Initial commit
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +1012 -3
- added_tokens.json +28 -0
- config.json +30 -0
- config_sentence_transformers.json +14 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
- tokenizer_config.json +247 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -1,3 +1,1012 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- dense
|
7 |
+
- generated_from_trainer
|
8 |
+
- dataset_size:1792739
|
9 |
+
- loss:CachedGISTEmbedLoss
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'The New START treaty maintains US nuclear and missile defence.
|
12 |
+
The US’ Nuclear armament will be modernized along with New START. “The Obama administration
|
13 |
+
has agreed to provide for modernization of the infrastructure essential to maintaining
|
14 |
+
our nuclear arsenal. Funding these efforts has become part of the negotiations
|
15 |
+
in the ratification process. The administration has put forth a 10-year plan to
|
16 |
+
spend $84 billion on the Energy Department''s nuclear weapons complex. Much of
|
17 |
+
the credit for getting the administration to add $14 billion to the originally
|
18 |
+
proposed $70 billion for modernization goes to Sen. Jon Kyl, the Arizona Republican
|
19 |
+
who has been vigilant in this effort. Implementing this modernization program
|
20 |
+
in a timely fashion would be important in ensuring that our nuclear arsenal is
|
21 |
+
maintained appropriately over the next decade and beyond.” [1] Both US Military
|
22 |
+
and civilian leaders insist that the new START treaty will still allow the US
|
23 |
+
to deploy effective missile defenses, something which Russia was opposed to, and
|
24 |
+
so will not affect US missile defense plans. The main limit on missile defense
|
25 |
+
is that the treaty prevents the conversion of existing launchers for this purpose
|
26 |
+
this would be more expensive than building new missiles specifically for defense
|
27 |
+
purposes. [2] Furthermore, as Joe Biden argues, New START is important to Russian
|
28 |
+
cooperation on missile defense: "This [missile defense] system demonstrates America''s
|
29 |
+
enduring commitment to Article 5 of the Washington Treaty—that an attack on one
|
30 |
+
is an attack on all. NATO missile defense also provides the opportunity for further
|
31 |
+
improvements in both NATO-Russian and U.S.-Russian relations. NATO and Russia
|
32 |
+
agreed at Lisbon to carry out a joint ballistic missile threat assessment, to
|
33 |
+
resume theater missile-defense exercises, and to explore further cooperation on
|
34 |
+
territorial missile defense—things that were nearly unimaginable two years ago.
|
35 |
+
These agreements underscore the strategic importance the alliance attaches to
|
36 |
+
improving its relationship with Russia. But trust and confidence in our relationship
|
37 |
+
with Russia would be undermined without Senate approval of the New Start Treaty,
|
38 |
+
which reduces strategic nuclear forces to levels not seen since the 1950s, and
|
39 |
+
restores important verification mechanisms that ceased when the first Start Treaty
|
40 |
+
expired last December." [3] In many ways, in the 21st Century having an abundance
|
41 |
+
of nuclear weapons, particularly having too many, is more of a liability than
|
42 |
+
an advantage. The United States will be far safer with fewer nuclear weapons in
|
43 |
+
the world and a stronger, more stable relationship with Russia under New START,
|
44 |
+
and this is desirable. Therefore it is clear that New START maintains the important
|
45 |
+
parts of US nuclear capabilities while removing the over-abundance which may become
|
46 |
+
a liability due to security and medical concerns, and so New START should be supported.
|
47 |
+
[1] Kissinger, Henry A. ; Shultz, George P. ; Baker III, James A’ ; Eagleburger
|
48 |
+
, Lawrence S. ; and Powell, Colin L. "The Republican case for ratifying New START".
|
49 |
+
Washington Post. 2 December 2010. [2] ibid [3] Biden, Joseph. "The case for ratifying
|
50 |
+
New START". Wall Street Journal. 25 November 2010.'
|
51 |
+
sentences:
|
52 |
+
- 'global law international law politics defence warpeace house supports new New
|
53 |
+
START will cause American missile and nuclear capabilities to atrophy, not to
|
54 |
+
be maintained. This is because it locks the US in to agreements of defensive reductions
|
55 |
+
which are tied into Russian offensive reductions. This could eventually leave
|
56 |
+
the US badly under-defended by its missile systems when compared against the offensive
|
57 |
+
capabilities of other nuclear states. Moreover, New START leaves in place the
|
58 |
+
pre-existing Russian tactical nuclear advantage harming US capabilities by comparison.
|
59 |
+
[1] Overall New START hams US missile and nuclear capabilities, and further advantages
|
60 |
+
Russia and other nuclear powers, and so should not be supported. As Mitt Romney
|
61 |
+
argued in 2010: "Does New START limit America’s options for missile defense? Yes.
|
62 |
+
For the first time, we would agree to an interrelationship between strategic offensive
|
63 |
+
weapons and missile defense. Moreover, Russia already asserts that the document
|
64 |
+
would constitute a binding limit on our missile defense program. But the WikiLeaks
|
65 |
+
revelation last weekend that North Korea has supplied Iran with long-range Russian
|
66 |
+
missiles confirms that robust missile defense is urgent and indispensable." [2]
|
67 |
+
[1] Spring, Baker. "Twelve Flaws of New START That Will Be Difficult to Fix".
|
68 |
+
Heritage Foundation, The Foundry. 16 September 2010. [2] Romney, Mitt. "Stop START."
|
69 |
+
Boston.com. 3 December 2010.'
|
70 |
+
- 'global law international law politics defence warpeace house supports new The
|
71 |
+
New START treaty maintains US nuclear and missile defence. The US’ Nuclear armament
|
72 |
+
will be modernized along with New START. “The Obama administration has agreed
|
73 |
+
to provide for modernization of the infrastructure essential to maintaining our
|
74 |
+
nuclear arsenal. Funding these efforts has become part of the negotiations in
|
75 |
+
the ratification process. The administration has put forth a 10-year plan to spend
|
76 |
+
$84 billion on the Energy Department''s nuclear weapons complex. Much of the credit
|
77 |
+
for getting the administration to add $14 billion to the originally proposed $70
|
78 |
+
billion for modernization goes to Sen. Jon Kyl, the Arizona Republican who has
|
79 |
+
been vigilant in this effort. Implementing this modernization program in a timely
|
80 |
+
fashion would be important in ensuring that our nuclear arsenal is maintained
|
81 |
+
appropriately over the next decade and beyond.” [1] Both US Military and civilian
|
82 |
+
leaders insist that the new START treaty will still allow the US to deploy effective
|
83 |
+
missile defenses, something which Russia was opposed to, and so will not affect
|
84 |
+
US missile defense plans. The main limit on missile defense is that the treaty
|
85 |
+
prevents the conversion of existing launchers for this purpose this would be more
|
86 |
+
expensive than building new missiles specifically for defense purposes. [2] Furthermore,
|
87 |
+
as Joe Biden argues, New START is important to Russian cooperation on missile
|
88 |
+
defense: "This [missile defense] system demonstrates America''s enduring commitment
|
89 |
+
to Article 5 of the Washington Treaty—that an attack on one is an attack on all.
|
90 |
+
NATO missile defense also provides the opportunity for further improvements in
|
91 |
+
both NATO-Russian and U.S.-Russian relations. NATO and Russia agreed at Lisbon
|
92 |
+
to carry out a joint ballistic missile threat assessment, to resume theater missile-defense
|
93 |
+
exercises, and to explore further cooperation on territorial missile defense—things
|
94 |
+
that were nearly unimaginable two years ago. These agreements underscore the strategic
|
95 |
+
importance the alliance attaches to improving its relationship with Russia. But
|
96 |
+
trust and confidence in our relationship with Russia would be undermined without
|
97 |
+
Senate approval of the New Start Treaty, which reduces strategic nuclear forces
|
98 |
+
to levels not seen since the 1950s, and restores important verification mechanisms
|
99 |
+
that ceased when the first Start Treaty expired last December." [3] In many ways,
|
100 |
+
in the 21st Century having an abundance of nuclear weapons, particularly having
|
101 |
+
too many, is more of a liability than an advantage. The United States will be
|
102 |
+
far safer with fewer nuclear weapons in the world and a stronger, more stable
|
103 |
+
relationship with Russia under New START, and this is desirable. Therefore it
|
104 |
+
is clear that New START maintains the important parts of US nuclear capabilities
|
105 |
+
while removing the over-abundance which may become a liability due to security
|
106 |
+
and medical concerns, and so New START should be supported. [1] Kissinger, Henry
|
107 |
+
A. ; Shultz, George P. ; Baker III, James A’ ; Eagleburger , Lawrence S. ; and
|
108 |
+
Powell, Colin L. "The Republican case for ratifying New START". Washington Post.
|
109 |
+
2 December 2010. [2] ibid [3] Biden, Joseph. "The case for ratifying New START".
|
110 |
+
Wall Street Journal. 25 November 2010.'
|
111 |
+
- political philosophy house believes civil liberties should be sacrificed If the
|
112 |
+
opposition is citing examples from history then there are just as many examples,
|
113 |
+
if not more, of western governments resisting the corrupting effects of increased
|
114 |
+
power and turning not from good into evil intentions. The fact of the matter is
|
115 |
+
that most of today’s western nations have a relatively good track record. It seems
|
116 |
+
the opposition is once again forgetting the real enemy – the terrorists. In most
|
117 |
+
Western countries we have a fully independent and liberal judiciary, vigorously
|
118 |
+
and vigilantly watching for human rights abuses and protecting civil liberties.
|
119 |
+
For nearly all Western countries, a slippery slope simply does not exist.
|
120 |
+
- source_sentence: Voting is a civic duty Other civic duties also exist “which are
|
121 |
+
recognised as necessary in order to live in a better, more cohesive, stable society”
|
122 |
+
1 like paying taxes, attending school, obeying road rules and, in some countries,
|
123 |
+
military conscription and jury duty. All of these obligatory activities require
|
124 |
+
far more time and effort than voting does, thus compulsory voting can be seen
|
125 |
+
as constituting a much smaller intrusion of freedom than any of these other activities.
|
126 |
+
The right to vote in a democracy has been fought for throughout modern history
|
127 |
+
. In the last century alone the soldiers of numerous wars and the suffragettes
|
128 |
+
of many countries fought and died for enfranchisement. It is our duty to respect
|
129 |
+
their sacrifice by voting. 1. Liberal Democrat Voice , 2006
|
130 |
+
sentences:
|
131 |
+
- y political philosophy politics government voting house would make voting Voting
|
132 |
+
is a civic duty Other civic duties also exist “which are recognised as necessary
|
133 |
+
in order to live in a better, more cohesive, stable society” 1 like paying taxes,
|
134 |
+
attending school, obeying road rules and, in some countries, military conscription
|
135 |
+
and jury duty. All of these obligatory activities require far more time and effort
|
136 |
+
than voting does, thus compulsory voting can be seen as constituting a much smaller
|
137 |
+
intrusion of freedom than any of these other activities. The right to vote in
|
138 |
+
a democracy has been fought for throughout modern history . In the last century
|
139 |
+
alone the soldiers of numerous wars and the suffragettes of many countries fought
|
140 |
+
and died for enfranchisement. It is our duty to respect their sacrifice by voting.
|
141 |
+
1. Liberal Democrat Voice , 2006
|
142 |
+
- y political philosophy politics government voting house would make voting A democracy
|
143 |
+
is based on the principle of respecting basic human rights, such as free choice.
|
144 |
+
This principle is directly violated by compulsory voting. With many civil rights
|
145 |
+
there is a choice to choose to engage in the activity or not. Voting has carries
|
146 |
+
that option, citizens of a democracy have the choice to either vote or not, despite
|
147 |
+
being encouraged to vote. It does not matter why a person chooses to vote or not,
|
148 |
+
it is the fact of principle that they have the right to choose. Compulsory voting
|
149 |
+
goes against such ideas of the freedom of choice, and on that grounds should not
|
150 |
+
be compulsory. The proposition speaks of those who died for the right to vote,
|
151 |
+
and respecting their sacrifice by voting. Unfortunately the proposition misconstrued
|
152 |
+
the point of their sacrifice- to give us the freedom of choice. That right of
|
153 |
+
choice must be upheld, as it is the cornerstone of a democratic society. Compulsory
|
154 |
+
voting would be infringing upon that.
|
155 |
+
- addiction healthcare international africa house believes ghanas ban smoking public
|
156 |
+
Even if such a link were true – the campaign against the ban on smoking in public
|
157 |
+
places in the UK accept that it’s unlikely that it is the primary cause of closures
|
158 |
+
in the UK 1 – the public health benefits would make it worth it. Reductions on
|
159 |
+
spending in some areas of the economy is likely to be balanced by increases elsewhere;
|
160 |
+
of course there will be losses in some industries – particularly tabacco itself
|
161 |
+
but those who stop smoking will have the money to spend elsewhere. Moreover the
|
162 |
+
economic effects are likely to be different in Africa; smoking outside in the
|
163 |
+
UK, bearing in mind the infamous British weather, is a far less attractive proposition
|
164 |
+
than smoking outdoors in many African countries. 1 'Why we want government to
|
165 |
+
amend the smoking ban', Save Our Pubs & Clubs,
|
166 |
+
- source_sentence: Racial quotas don’t develop new players The quota system could
|
167 |
+
lead to moving players from the regional teams who generally have less non-white
|
168 |
+
players pilfering them from other unions, rather “Home growing” them [1] . Former
|
169 |
+
Springboks coach Peter de Villiers, the first non-white person in that role, has
|
170 |
+
described quotas as a “waste of time [2] ”. Depending on the exact phraseology
|
171 |
+
of the rules, this could even allow black players from outside South Africa (from,
|
172 |
+
for example, England) to be used to fill the quota. [1] McGregor, Liz, ‘New Year,
|
173 |
+
new model for SA Rugby? Here’s hoping’, Books Live, 30 December 2013, [2] Peacock,
|
174 |
+
James, ‘Peter de Villiers says racial quotas are ‘waste of time’, BBC Sport, 15
|
175 |
+
August 2013,
|
176 |
+
sentences:
|
177 |
+
- ational africa sport team sports house supports racial quotas south african rugby
|
178 |
+
Racial quotas don’t develop new players The quota system could lead to moving
|
179 |
+
players from the regional teams who generally have less non-white players pilfering
|
180 |
+
them from other unions, rather “Home growing” them [1] . Former Springboks coach
|
181 |
+
Peter de Villiers, the first non-white person in that role, has described quotas
|
182 |
+
as a “waste of time [2] ”. Depending on the exact phraseology of the rules, this
|
183 |
+
could even allow black players from outside South Africa (from, for example, England)
|
184 |
+
to be used to fill the quota. [1] McGregor, Liz, ‘New Year, new model for SA Rugby?
|
185 |
+
Here’s hoping’, Books Live, 30 December 2013, [2] Peacock, James, ‘Peter de Villiers
|
186 |
+
says racial quotas are ‘waste of time’, BBC Sport, 15 August 2013,
|
187 |
+
- ational africa sport team sports house supports racial quotas south african rugby
|
188 |
+
Even if it doesn’t increase the numbers at the grass roots and youth levels, it
|
189 |
+
will create more players who can be selected by the provinces for Currie Cup competition.
|
190 |
+
This, in turn, could give more non-white players the development and the experience
|
191 |
+
they need to make it in to the national team.
|
192 |
+
- government house believes governance united states should be split between two
|
193 |
+
The reason why a febrile atmosphere has emerged in recent years is because both
|
194 |
+
red and blue single-party governments have made unpopular decisions without the
|
195 |
+
necessary checks being place upon it. This has made people disenchanted with the
|
196 |
+
political system and made them think that it is only looks out for ideological
|
197 |
+
elites, causing a backlash in the form of the Tea Party and Occupy movements.
|
198 |
+
[1] Divided Government combats this by helping to re-establish consensus between
|
199 |
+
the parties over what is best for America, ensuring that policies have the consent
|
200 |
+
of a majority of people, thus preventing the overtly ideological backlashes seen
|
201 |
+
recently. [1] Miles, Chris, ‘What the Tea Party and Occupy Wall Street Have in
|
202 |
+
Common’, policymic,
|
203 |
+
- source_sentence: 'Collisions are not as dangerous as they’re feared to be. Some
|
204 |
+
hits lead to injury, but the vast majority do not. One commentator challenged
|
205 |
+
proponents of a rule change “to name as many as five MLB catchers in the last
|
206 |
+
30 years who have had their careers ended or shortened as a result of a home plate
|
207 |
+
collision. Personally, I can’t think of one.” [1] In posing some—though not a
|
208 |
+
substantial—risk, home plate collisions are very much like other aspects of the
|
209 |
+
sport. Every time a pitcher throws a pitch, the batter could get struck and hurt.
|
210 |
+
Every time two outfielders converge on a fly ball, there’s a risk of injury. Baseball,
|
211 |
+
as with many other sports, inherently involves the risk of injury. It makes little
|
212 |
+
sense to focus on this play, which doesn’t often result in significant injury.
|
213 |
+
Moreover, catchers are trained to position their bodies in ways that minimize
|
214 |
+
the injury risk from crashes. [2] If catchers do as they’re trained, they’re very
|
215 |
+
unlikely to get hurt. [1] Joe Janish, “Buster Posey Aftermath: What Should Be
|
216 |
+
Done?,” On Baseball, May 30, 2011, (internal quotation marks omitted). [2] See,
|
217 |
+
for example, “Relays, Cutoffs, and Plays at Home,” Baseball-Catcher.com, .'
|
218 |
+
sentences:
|
219 |
+
- ss economy general international africa house believes africa really rising Bucking
|
220 |
+
this trend of increased HDI figures are the states who are currently witnessing,
|
221 |
+
or have recently experienced, armed conflict. Africa has observed many well-known
|
222 |
+
and lesser known conflicts which have damaged infrastructure and made it significantly
|
223 |
+
harder for local populations to access key services such as schools and healthcare.
|
224 |
+
Five of seven countries with the poorest nutritional scores are African and have
|
225 |
+
recently emerged from armed conflict [1] , they are also rated as some of the
|
226 |
+
poorest countries in the world. [1] Smith, ‘Africa is not rising’, 2013
|
227 |
+
- 'team sports house believes major league baseball should continue allow collisions
|
228 |
+
Collisions are not as dangerous as they’re feared to be. Some hits lead to injury,
|
229 |
+
but the vast majority do not. One commentator challenged proponents of a rule
|
230 |
+
change “to name as many as five MLB catchers in the last 30 years who have had
|
231 |
+
their careers ended or shortened as a result of a home plate collision. Personally,
|
232 |
+
I can’t think of one.” [1] In posing some—though not a substantial—risk, home
|
233 |
+
plate collisions are very much like other aspects of the sport. Every time a pitcher
|
234 |
+
throws a pitch, the batter could get struck and hurt. Every time two outfielders
|
235 |
+
converge on a fly ball, there’s a risk of injury. Baseball, as with many other
|
236 |
+
sports, inherently involves the risk of injury. It makes little sense to focus
|
237 |
+
on this play, which doesn’t often result in significant injury. Moreover, catchers
|
238 |
+
are trained to position their bodies in ways that minimize the injury risk from
|
239 |
+
crashes. [2] If catchers do as they’re trained, they’re very unlikely to get hurt.
|
240 |
+
[1] Joe Janish, “Buster Posey Aftermath: What Should Be Done?,” On Baseball, May
|
241 |
+
30, 2011, (internal quotation marks omitted). [2] See, for example, “Relays, Cutoffs,
|
242 |
+
and Plays at Home,” Baseball-Catcher.com, .'
|
243 |
+
- 'team sports house believes major league baseball should continue allow collisions
|
244 |
+
Collisions increase the risk of injury dramatically. Though it’s true that most
|
245 |
+
collisions do not result in significant injury, they result in a higher rate of
|
246 |
+
injury than almost any other baseball play. And just because a collision doesn’t
|
247 |
+
necessarily result in an injury that derails a player’s entire career does not
|
248 |
+
mean that it didn’t take a toll. This is especially true now that we’re learning
|
249 |
+
more about concussions, which might be suffered without someone immediately realizing
|
250 |
+
it. After a catcher on his team suffered a concussion in a collision, Yankees
|
251 |
+
manager Joe Girardi referred to this type of injury as “so unpredictable. That’s
|
252 |
+
what’s so scary.... You just don’t know what’s going to happen” with a concussion.
|
253 |
+
[1] When catchers are trained to block the plate, they’re taught how to reduce
|
254 |
+
the risk of injury, not how to eliminate the risk of injury. No matter how a catcher
|
255 |
+
positions himself, there will still be a risk of injury, and it will still be
|
256 |
+
much higher than for any other play in baseball. (Opposition Point #1 elaborates
|
257 |
+
more upon the risk of injury.) [1] Mark Feisand, “Yankees manager Joe Girardi
|
258 |
+
not counting out catcher Francisco Cervelli for postseason roster,” New York Daily
|
259 |
+
News, Sept. 17, 2011, .'
|
260 |
+
- source_sentence: 'In the public’s eyes, the government seems to suspect everyone.
|
261 |
+
Although the anti-terrorist measures are supposed to be trying to catch certain
|
262 |
+
people, it is the whole of the public who have to suffer on a daily basis: an
|
263 |
+
abundance of security cameras, security checks, and anti-privacy measures continually
|
264 |
+
invade innocent people’s lives and yet it is supposed to be the terrorists who
|
265 |
+
are being punished. The issue of justice, and whether it is actually being done,
|
266 |
+
has to be fully looked at properly. These measures are not solving the problem
|
267 |
+
of terrorism as it does not address the core grievances. Instead other ways such
|
268 |
+
as negotiation to address grievances is necessary, as happened in Northern Ireland
|
269 |
+
[1] . [1] Bowcott, Owen, ‘Northern Ireland’, The Guardian, 11 May 2007, , accessed
|
270 |
+
9 September 2011'
|
271 |
+
sentences:
|
272 |
+
- europe global human rights house believes european union should lift its Lifting
|
273 |
+
the arms ban will not strengthen China militarily. Not only would sales be restricted
|
274 |
+
by the new code of conduct, but China’s Ambassador to the European Union has also
|
275 |
+
clearly reiterated "We have stated several times that we do not intend to buy
|
276 |
+
European military equipment" as "We cannot afford to buy such expensive weapons".
|
277 |
+
[1] Even if China was sold high-tech European equipment, this could even be beneficial
|
278 |
+
for the EU as it would make China dependent on the EU for such items and make
|
279 |
+
it less likely to pursue its own research and development programmes. [1] Rufino,
|
280 |
+
Filipe and Vucheva, Elitsa, ‘EU Arms Embargo is ‘political discrimination, says
|
281 |
+
Chinese Ambassador’, 2005.
|
282 |
+
- 'political philosophy house believes civil liberties should be sacrificed In the
|
283 |
+
public’s eyes, the government seems to suspect everyone. Although the anti-terrorist
|
284 |
+
measures are supposed to be trying to catch certain people, it is the whole of
|
285 |
+
the public who have to suffer on a daily basis: an abundance of security cameras,
|
286 |
+
security checks, and anti-privacy measures continually invade innocent people’s
|
287 |
+
lives and yet it is supposed to be the terrorists who are being punished. The
|
288 |
+
issue of justice, and whether it is actually being done, has to be fully looked
|
289 |
+
at properly. These measures are not solving the problem of terrorism as it does
|
290 |
+
not address the core grievances. Instead other ways such as negotiation to address
|
291 |
+
grievances is necessary, as happened in Northern Ireland [1] . [1] Bowcott, Owen,
|
292 |
+
‘Northern Ireland’, The Guardian, 11 May 2007, , accessed 9 September 2011'
|
293 |
+
- political philosophy house believes civil liberties should be sacrificed This
|
294 |
+
is just like any other investigation. Obviously the government has to take a broad
|
295 |
+
approach because any loophole could be exploited by the unscrupulous terrorist.
|
296 |
+
It is a necessity, albeit one with unfortunate consequences, but a necessity all
|
297 |
+
the same. As for negotiations with terrorists, it is the propositions view that
|
298 |
+
this option does not exist when dealing with terrorists of a fundamentalist background,
|
299 |
+
who are, by definition, not willing to compromise and therefore unable to be negotiated
|
300 |
+
with.
|
301 |
+
pipeline_tag: sentence-similarity
|
302 |
+
library_name: sentence-transformers
|
303 |
+
metrics:
|
304 |
+
- cosine_accuracy
|
305 |
+
model-index:
|
306 |
+
- name: SentenceTransformer
|
307 |
+
results:
|
308 |
+
- task:
|
309 |
+
type: triplet
|
310 |
+
name: Triplet
|
311 |
+
dataset:
|
312 |
+
name: dev koen
|
313 |
+
type: dev_koen
|
314 |
+
metrics:
|
315 |
+
- type: cosine_accuracy
|
316 |
+
value: 0.6882352828979492
|
317 |
+
name: Cosine Accuracy
|
318 |
+
---
|
319 |
+
|
320 |
+
# SentenceTransformer
|
321 |
+
|
322 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained on the json dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
323 |
+
|
324 |
+
## Model Details
|
325 |
+
|
326 |
+
### Model Description
|
327 |
+
- **Model Type:** Sentence Transformer
|
328 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
329 |
+
- **Maximum Sequence Length:** 3072 tokens
|
330 |
+
- **Output Dimensionality:** 1024 dimensions
|
331 |
+
- **Similarity Function:** Cosine Similarity
|
332 |
+
- **Training Dataset:**
|
333 |
+
- json
|
334 |
+
<!-- - **Language:** Unknown -->
|
335 |
+
<!-- - **License:** Unknown -->
|
336 |
+
|
337 |
+
### Model Sources
|
338 |
+
|
339 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
340 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
341 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
342 |
+
|
343 |
+
### Full Model Architecture
|
344 |
+
|
345 |
+
```
|
346 |
+
SentenceTransformer(
|
347 |
+
(0): Transformer({'max_seq_length': 3072, 'do_lower_case': False, 'architecture': 'Qwen3Model'})
|
348 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
|
349 |
+
(2): Normalize()
|
350 |
+
)
|
351 |
+
```
|
352 |
+
|
353 |
+
## Usage
|
354 |
+
|
355 |
+
### Direct Usage (Sentence Transformers)
|
356 |
+
|
357 |
+
First install the Sentence Transformers library:
|
358 |
+
|
359 |
+
```bash
|
360 |
+
pip install -U sentence-transformers
|
361 |
+
```
|
362 |
+
|
363 |
+
Then you can load this model and run inference.
|
364 |
+
```python
|
365 |
+
from sentence_transformers import SentenceTransformer
|
366 |
+
|
367 |
+
# Download from the 🤗 Hub
|
368 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
369 |
+
# Run inference
|
370 |
+
queries = [
|
371 |
+
"In the public\u2019s eyes, the government seems to suspect everyone. Although the anti-terrorist measures are supposed to be trying to catch certain people, it is the whole of the public who have to suffer on a daily basis: an abundance of security cameras, security checks, and anti-privacy measures continually invade innocent people\u2019s lives and yet it is supposed to be the terrorists who are being punished. The issue of justice, and whether it is actually being done, has to be fully looked at properly. These measures are not solving the problem of terrorism as it does not address the core grievances. Instead other ways such as negotiation to address grievances is necessary, as happened in Northern Ireland [1] . [1] Bowcott, Owen, \u2018Northern Ireland\u2019, The Guardian, 11 May 2007, , accessed 9 September 2011",
|
372 |
+
]
|
373 |
+
documents = [
|
374 |
+
'political philosophy house believes civil liberties should be sacrificed This is just like any other investigation. Obviously the government has to take a broad approach because any loophole could be exploited by the unscrupulous terrorist. It is a necessity, albeit one with unfortunate consequences, but a necessity all the same. As for negotiations with terrorists, it is the propositions view that this option does not exist when dealing with terrorists of a fundamentalist background, who are, by definition, not willing to compromise and therefore unable to be negotiated with.',
|
375 |
+
'political philosophy house believes civil liberties should be sacrificed In the public’s eyes, the government seems to suspect everyone. Although the anti-terrorist measures are supposed to be trying to catch certain people, it is the whole of the public who have to suffer on a daily basis: an abundance of security cameras, security checks, and anti-privacy measures continually invade innocent people’s lives and yet it is supposed to be the terrorists who are being punished. The issue of justice, and whether it is actually being done, has to be fully looked at properly. These measures are not solving the problem of terrorism as it does not address the core grievances. Instead other ways such as negotiation to address grievances is necessary, as happened in Northern Ireland [1] . [1] Bowcott, Owen, ‘Northern Ireland’, The Guardian, 11 May 2007, , accessed 9 September 2011',
|
376 |
+
'europe global human rights house believes european union should lift its Lifting the arms ban will not strengthen China militarily. Not only would sales be restricted by the new code of conduct, but China’s Ambassador to the European Union has also clearly reiterated "We have stated several times that we do not intend to buy European military equipment" as "We cannot afford to buy such expensive weapons". [1] Even if China was sold high-tech European equipment, this could even be beneficial for the EU as it would make China dependent on the EU for such items and make it less likely to pursue its own research and development programmes. [1] Rufino, Filipe and Vucheva, Elitsa, ‘EU Arms Embargo is ‘political discrimination, says Chinese Ambassador’, 2005.',
|
377 |
+
]
|
378 |
+
query_embeddings = model.encode_query(queries)
|
379 |
+
document_embeddings = model.encode_document(documents)
|
380 |
+
print(query_embeddings.shape, document_embeddings.shape)
|
381 |
+
# [1, 1024] [3, 1024]
|
382 |
+
|
383 |
+
# Get the similarity scores for the embeddings
|
384 |
+
similarities = model.similarity(query_embeddings, document_embeddings)
|
385 |
+
print(similarities)
|
386 |
+
# tensor([[0.5128, 0.9208, 0.2109]])
|
387 |
+
```
|
388 |
+
|
389 |
+
<!--
|
390 |
+
### Direct Usage (Transformers)
|
391 |
+
|
392 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
393 |
+
|
394 |
+
</details>
|
395 |
+
-->
|
396 |
+
|
397 |
+
<!--
|
398 |
+
### Downstream Usage (Sentence Transformers)
|
399 |
+
|
400 |
+
You can finetune this model on your own dataset.
|
401 |
+
|
402 |
+
<details><summary>Click to expand</summary>
|
403 |
+
|
404 |
+
</details>
|
405 |
+
-->
|
406 |
+
|
407 |
+
<!--
|
408 |
+
### Out-of-Scope Use
|
409 |
+
|
410 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
411 |
+
-->
|
412 |
+
|
413 |
+
## Evaluation
|
414 |
+
|
415 |
+
### Metrics
|
416 |
+
|
417 |
+
#### Triplet
|
418 |
+
|
419 |
+
* Dataset: `dev_koen`
|
420 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
421 |
+
|
422 |
+
| Metric | Value |
|
423 |
+
|:--------------------|:-----------|
|
424 |
+
| **cosine_accuracy** | **0.6882** |
|
425 |
+
|
426 |
+
<!--
|
427 |
+
## Bias, Risks and Limitations
|
428 |
+
|
429 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
430 |
+
-->
|
431 |
+
|
432 |
+
<!--
|
433 |
+
### Recommendations
|
434 |
+
|
435 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
436 |
+
-->
|
437 |
+
|
438 |
+
## Training Details
|
439 |
+
|
440 |
+
### Training Dataset
|
441 |
+
|
442 |
+
#### json
|
443 |
+
|
444 |
+
* Dataset: json
|
445 |
+
* Size: 1,792,739 training samples
|
446 |
+
* Columns: <code>anchor</code>, <code>positive</code>, <code>negative_1</code>, <code>negative_2</code>, and <code>negative_3</code>
|
447 |
+
* Approximate statistics based on the first 1000 samples:
|
448 |
+
| | anchor | positive | negative_1 | negative_2 | negative_3 |
|
449 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
450 |
+
| type | string | string | string | string | string |
|
451 |
+
| details | <ul><li>min: 2 tokens</li><li>mean: 23.31 tokens</li><li>max: 140 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 66.84 tokens</li><li>max: 108 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 63.01 tokens</li><li>max: 110 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 62.48 tokens</li><li>max: 110 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 62.98 tokens</li><li>max: 106 tokens</li></ul> |
|
452 |
+
* Samples:
|
453 |
+
| anchor | positive | negative_1 | negative_2 | negative_3 |
|
454 |
+
|:-------------------------------------------------------------------|:-------------------------------------------------------|:--------------------------------------------------------------|:------------------------------------------------------------|:-----------------------------------------------------------|
|
455 |
+
| <code>ATP란?</code> | <code>아데노신 삼인산 아데노신 삼인산(, ATP)은 생명체의 주된 에너지원이다.</code> | <code>ATP ATP는 다음 뜻의 약자이다.</code> | <code>해당 실제로 ADP는 ADPMg로, ATP는 ATPMg로 존재한다.</code> | <code>ATE ATE는 다음을 가리킨다.</code> |
|
456 |
+
| <code>난촨구와 둥촨구는 어느 나라에 위치해 있습니까?</code> | <code>난촨구(南川区)는 중국 충칭의 구이자 이전의 현이다.</code> | <code>남풍현(南丰县)은 중국 장시성(江西省) 푸저우(福州)에 위치한 군이다.</code> | <code>도교, 광둥 도교(道滘)는 중국 남부 광둥성 동관 시의 관할 하에 있는 도시입니다.</code> | <code>동포구 동포구는 중국 쓰촨성의 구역입니다. 이곳은 메이산시의 관할 하에 있습니다.</code> |
|
457 |
+
| <code>가짜대나무(Pseudosasa)와 별꽃(Cerastium)은 모두 자생 식물과 관련이 있습니까?</code> | <code>가짜사사(Pseudosasa)는 풀과에 속하는 동아시아 대나무의 속입니다.</code> | <code>세팔로소루스(Cephalosorus)는 데이지 과에 속하는 꽃이 피는 식물의 속입니다.</code> | <code>가짜기생충속(Pseudoparasitus)은 라엘라피다에 속하는 진드기의 속입니다.</code> | <code>페리타사(Peritassa)는 쐐기풀과(Celastraceae) 식물의 속입니다.</code> |
|
458 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
459 |
+
```json
|
460 |
+
{
|
461 |
+
"guide": "SentenceTransformer('dragonkue/snowflake-arctic-embed-l-v2.0-ko')",
|
462 |
+
"temperature": 0.01,
|
463 |
+
"mini_batch_size": 8,
|
464 |
+
"margin_strategy": "absolute",
|
465 |
+
"margin": 0.05
|
466 |
+
}
|
467 |
+
```
|
468 |
+
|
469 |
+
### Evaluation Dataset
|
470 |
+
|
471 |
+
#### json
|
472 |
+
|
473 |
+
* Dataset: json
|
474 |
+
* Size: 13,430 evaluation samples
|
475 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
476 |
+
* Approximate statistics based on the first 1000 samples:
|
477 |
+
| | anchor | positive | negative |
|
478 |
+
|:--------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
479 |
+
| type | string | string | string |
|
480 |
+
| details | <ul><li>min: 54 tokens</li><li>mean: 244.48 tokens</li><li>max: 1090 tokens</li></ul> | <ul><li>min: 37 tokens</li><li>mean: 164.02 tokens</li><li>max: 658 tokens</li></ul> | <ul><li>min: 65 tokens</li><li>mean: 254.71 tokens</li><li>max: 1100 tokens</li></ul> |
|
481 |
+
* Samples:
|
482 |
+
| anchor | positive | negative |
|
483 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
484 |
+
| <code>Being vegetarian helps the environment Becoming a vegetarian is an environmentally friendly thing to do. Modern farming is one of the main sources of pollution in our rivers. Beef farming is one of the main causes of deforestation, and as long as people continue to buy fast food in their billions, there will be a financial incentive to continue cutting down trees to make room for cattle. Because of our desire to eat fish, our rivers and seas are being emptied of fish and many species are facing extinction. Energy resources are used up much more greedily by meat farming than my farming cereals, pulses etc. Eating meat and fish not only causes cruelty to animals, it causes serious harm to the environment and to biodiversity. For example consider Meat production related pollution and deforestation At Toronto’s 1992 Royal Agricultural Winter Fair, Agriculture Canada displayed two contrasting statistics: “it takes four football fields of land (about 1.6 hectares) to feed each Canadian” and ...</code> | <code>animals environment general health health general weight philosophy ethics You don’t have to be vegetarian to be green. Many special environments have been created by livestock farming – for example chalk down land in England and mountain pastures in many countries. Ending livestock farming would see these areas go back to woodland with a loss of many unique plants and animals. Growing crops can also be very bad for the planet, with fertilisers and pesticides polluting rivers, lakes and seas. Most tropical forests are now cut down for timber, or to allow oil palm trees to be grown in plantations, not to create space for meat production. British farmer and former editor Simon Farrell also states: “Many vegans and vegetarians rely on one source from the U.N. calculation that livestock generates 18% of global carbon emissions, but this figure contains basic mistakes. It attributes all deforestation from ranching to cattle, rather than logging or development. It also muddles up one-off emi...</code> | <code>animals environment general health health general weight philosophy ethics Being vegetarian helps the environment Becoming a vegetarian is an environmentally friendly thing to do. Modern farming is one of the main sources of pollution in our rivers. Beef farming is one of the main causes of deforestation, and as long as people continue to buy fast food in their billions, there will be a financial incentive to continue cutting down trees to make room for cattle. Because of our desire to eat fish, our rivers and seas are being emptied of fish and many species are facing extinction. Energy resources are used up much more greedily by meat farming than my farming cereals, pulses etc. Eating meat and fish not only causes cruelty to animals, it causes serious harm to the environment and to biodiversity. For example consider Meat production related pollution and deforestation At Toronto’s 1992 Royal Agricultural Winter Fair, Agriculture Canada displayed two contrasting statistics: “it takes fo...</code> |
|
485 |
+
| <code>It is immoral to kill animals As evolved human beings it is our moral duty to inflict as little pain as possible for our survival. So if we do not need to inflict pain to animals in order to survive, we should not do it. Farm animals such as chickens, pigs, sheep, and cows are sentient living beings like us - they are our evolutionary cousins and like us they can feel pleasure and pain. The 18th century utilitarian philosopher Jeremy Bentham even believed that animal suffering was just as serious as human suffering and likened the idea of human superiority to racism. It is wrong to farm and kill these animals for food when we do not need to do so. The methods of farming and slaughter of these animals are often barbaric and cruel - even on supposedly 'free range' farms. [1] Ten billion animals were slaughtered for human consumption each year, stated PETA. And unlike the farms long time ago, where animals roamed freely, today, most animals are factory farmed: crammed into cages where th...</code> | <code>animals environment general health health general weight philosophy ethics There is a great moral difference between humans and animals. Unlike animals, humans are capable of rational thought and can alter the world around them. Other creatures were put on this earth for mankind to use, and that includes eating meat. For all these reasons we say that men and women have rights and that animals don’t. This means that eating meat is in no way like murder. It is natural for human beings to farm, kill, and eat other species. In the wild there is a brutal struggle for existence. The fact that we humans have succeeded in that struggle by exploiting our natural environment means that we have a natural right over lower species. In fact farming animals is much less brutal than the pain and hardship that animals inflict on each other naturally in the wild. Eating meat does not need to mean cruelty to animals. There are a growing number of organic and free-range farms that can provide meat without...</code> | <code>animals environment general health health general weight philosophy ethics It is immoral to kill animals As evolved human beings it is our moral duty to inflict as little pain as possible for our survival. So if we do not need to inflict pain to animals in order to survive, we should not do it. Farm animals such as chickens, pigs, sheep, and cows are sentient living beings like us - they are our evolutionary cousins and like us they can feel pleasure and pain. The 18th century utilitarian philosopher Jeremy Bentham even believed that animal suffering was just as serious as human suffering and likened the idea of human superiority to racism. It is wrong to farm and kill these animals for food when we do not need to do so. The methods of farming and slaughter of these animals are often barbaric and cruel - even on supposedly 'free range' farms. [1] Ten billion animals were slaughtered for human consumption each year, stated PETA. And unlike the farms long time ago, where animals roamed f...</code> |
|
486 |
+
| <code>Vegetarianism is healthier There are significant health benefits to 'going veggie'; a vegetarian diet contains high quantities of fibre, vitamins, and minerals, and is low in fat. (A vegan diet is even better since eggs and dairy products are high in cholesterol.) The risk of contracting many forms of cancer is increased by eating meat: in 1996 the American Cancer Society recommended that red meat should be excluded from the diet entirely. Eating meat also increases the risk of heart disease - vegetables contain no cholesterol, which can build up to cause blocked arteries in meat-eaters. An American study found out that: “that men in the highest quintile of red-meat consumption — those who ate about 5 oz. of red meat a day, roughly the equivalent of a small steak had a 31% higher risk of death over a 10-year period than men in the lowest-consumption quintile, who ate less than 1 oz. of red meat per day, or approximately three slices of corned beef.” [1] A vegetarian diet reduces the ri...</code> | <code>animals environment general health health general weight philosophy ethics The key to good health is a balanced diet, not a meat- and fish-free diet. Meat and fish are good sources of protein, iron, and other vitamins and minerals. Most of the health benefits of a vegetarian diet derive from its being high in fibre and low in fat and cholesterol. These can be achieved by avoiding fatty and fried foods, eating only lean grilled meat and fish, and including a large amount of fruit and vegetables in your diet along with meat and fish. In general, raw, unprocessed meat from the muscle is made up of the following: protein 15 - 22 % Fat 3 - 15 % Minerals, carbohydrates 1 - 5 % Water 65 - 75 %, all things that we need in moderation. [1] A meat- and fish-free diet is unbalanced and makes it more likely that you will go short of protein, iron and some minerals such as B12 for which we are primarily dependent on animal foodstuffs. Also, a vegetarian diet, in the West, is a more expensive option ...</code> | <code>animals environment general health health general weight philosophy ethics Vegetarianism is healthier There are significant health benefits to 'going veggie'; a vegetarian diet contains high quantities of fibre, vitamins, and minerals, and is low in fat. (A vegan diet is even better since eggs and dairy products are high in cholesterol.) The risk of contracting many forms of cancer is increased by eating meat: in 1996 the American Cancer Society recommended that red meat should be excluded from the diet entirely. Eating meat also increases the risk of heart disease - vegetables contain no cholesterol, which can build up to cause blocked arteries in meat-eaters. An American study found out that: “that men in the highest quintile of red-meat consumption — those who ate about 5 oz. of red meat a day, roughly the equivalent of a small steak had a 31% higher risk of death over a 10-year period than men in the lowest-consumption quintile, who ate less than 1 oz. of red meat per day, or appro...</code> |
|
487 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
488 |
+
```json
|
489 |
+
{
|
490 |
+
"guide": "SentenceTransformer('dragonkue/snowflake-arctic-embed-l-v2.0-ko')",
|
491 |
+
"temperature": 0.01,
|
492 |
+
"mini_batch_size": 8,
|
493 |
+
"margin_strategy": "absolute",
|
494 |
+
"margin": 0.05
|
495 |
+
}
|
496 |
+
```
|
497 |
+
|
498 |
+
### Training Hyperparameters
|
499 |
+
#### Non-Default Hyperparameters
|
500 |
+
|
501 |
+
- `eval_strategy`: steps
|
502 |
+
- `per_device_train_batch_size`: 2048
|
503 |
+
- `per_device_eval_batch_size`: 16
|
504 |
+
- `learning_rate`: 2e-05
|
505 |
+
- `warmup_ratio`: 0.05
|
506 |
+
- `bf16`: True
|
507 |
+
- `ddp_find_unused_parameters`: True
|
508 |
+
- `ddp_timeout`: 7200
|
509 |
+
- `prompts`: {'anchor': 'Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:'}
|
510 |
+
- `batch_sampler`: no_duplicates
|
511 |
+
|
512 |
+
#### All Hyperparameters
|
513 |
+
<details><summary>Click to expand</summary>
|
514 |
+
|
515 |
+
- `overwrite_output_dir`: False
|
516 |
+
- `do_predict`: False
|
517 |
+
- `eval_strategy`: steps
|
518 |
+
- `prediction_loss_only`: True
|
519 |
+
- `per_device_train_batch_size`: 2048
|
520 |
+
- `per_device_eval_batch_size`: 16
|
521 |
+
- `per_gpu_train_batch_size`: None
|
522 |
+
- `per_gpu_eval_batch_size`: None
|
523 |
+
- `gradient_accumulation_steps`: 1
|
524 |
+
- `eval_accumulation_steps`: None
|
525 |
+
- `torch_empty_cache_steps`: None
|
526 |
+
- `learning_rate`: 2e-05
|
527 |
+
- `weight_decay`: 0.0
|
528 |
+
- `adam_beta1`: 0.9
|
529 |
+
- `adam_beta2`: 0.999
|
530 |
+
- `adam_epsilon`: 1e-08
|
531 |
+
- `max_grad_norm`: 1.0
|
532 |
+
- `num_train_epochs`: 3
|
533 |
+
- `max_steps`: -1
|
534 |
+
- `lr_scheduler_type`: linear
|
535 |
+
- `lr_scheduler_kwargs`: {}
|
536 |
+
- `warmup_ratio`: 0.05
|
537 |
+
- `warmup_steps`: 0
|
538 |
+
- `log_level`: passive
|
539 |
+
- `log_level_replica`: warning
|
540 |
+
- `log_on_each_node`: True
|
541 |
+
- `logging_nan_inf_filter`: True
|
542 |
+
- `save_safetensors`: True
|
543 |
+
- `save_on_each_node`: False
|
544 |
+
- `save_only_model`: False
|
545 |
+
- `restore_callback_states_from_checkpoint`: False
|
546 |
+
- `no_cuda`: False
|
547 |
+
- `use_cpu`: False
|
548 |
+
- `use_mps_device`: False
|
549 |
+
- `seed`: 42
|
550 |
+
- `data_seed`: None
|
551 |
+
- `jit_mode_eval`: False
|
552 |
+
- `use_ipex`: False
|
553 |
+
- `bf16`: True
|
554 |
+
- `fp16`: False
|
555 |
+
- `fp16_opt_level`: O1
|
556 |
+
- `half_precision_backend`: auto
|
557 |
+
- `bf16_full_eval`: False
|
558 |
+
- `fp16_full_eval`: False
|
559 |
+
- `tf32`: None
|
560 |
+
- `local_rank`: 3
|
561 |
+
- `ddp_backend`: None
|
562 |
+
- `tpu_num_cores`: None
|
563 |
+
- `tpu_metrics_debug`: False
|
564 |
+
- `debug`: []
|
565 |
+
- `dataloader_drop_last`: True
|
566 |
+
- `dataloader_num_workers`: 0
|
567 |
+
- `dataloader_prefetch_factor`: None
|
568 |
+
- `past_index`: -1
|
569 |
+
- `disable_tqdm`: False
|
570 |
+
- `remove_unused_columns`: True
|
571 |
+
- `label_names`: None
|
572 |
+
- `load_best_model_at_end`: False
|
573 |
+
- `ignore_data_skip`: False
|
574 |
+
- `fsdp`: []
|
575 |
+
- `fsdp_min_num_params`: 0
|
576 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
577 |
+
- `tp_size`: 0
|
578 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
579 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
580 |
+
- `deepspeed`: None
|
581 |
+
- `label_smoothing_factor`: 0.0
|
582 |
+
- `optim`: adamw_torch
|
583 |
+
- `optim_args`: None
|
584 |
+
- `adafactor`: False
|
585 |
+
- `group_by_length`: False
|
586 |
+
- `length_column_name`: length
|
587 |
+
- `ddp_find_unused_parameters`: True
|
588 |
+
- `ddp_bucket_cap_mb`: None
|
589 |
+
- `ddp_broadcast_buffers`: False
|
590 |
+
- `dataloader_pin_memory`: True
|
591 |
+
- `dataloader_persistent_workers`: False
|
592 |
+
- `skip_memory_metrics`: True
|
593 |
+
- `use_legacy_prediction_loop`: False
|
594 |
+
- `push_to_hub`: False
|
595 |
+
- `resume_from_checkpoint`: None
|
596 |
+
- `hub_model_id`: None
|
597 |
+
- `hub_strategy`: every_save
|
598 |
+
- `hub_private_repo`: None
|
599 |
+
- `hub_always_push`: False
|
600 |
+
- `gradient_checkpointing`: False
|
601 |
+
- `gradient_checkpointing_kwargs`: None
|
602 |
+
- `include_inputs_for_metrics`: False
|
603 |
+
- `include_for_metrics`: []
|
604 |
+
- `eval_do_concat_batches`: True
|
605 |
+
- `fp16_backend`: auto
|
606 |
+
- `push_to_hub_model_id`: None
|
607 |
+
- `push_to_hub_organization`: None
|
608 |
+
- `mp_parameters`:
|
609 |
+
- `auto_find_batch_size`: False
|
610 |
+
- `full_determinism`: False
|
611 |
+
- `torchdynamo`: None
|
612 |
+
- `ray_scope`: last
|
613 |
+
- `ddp_timeout`: 7200
|
614 |
+
- `torch_compile`: False
|
615 |
+
- `torch_compile_backend`: None
|
616 |
+
- `torch_compile_mode`: None
|
617 |
+
- `include_tokens_per_second`: False
|
618 |
+
- `include_num_input_tokens_seen`: False
|
619 |
+
- `neftune_noise_alpha`: None
|
620 |
+
- `optim_target_modules`: None
|
621 |
+
- `batch_eval_metrics`: False
|
622 |
+
- `eval_on_start`: False
|
623 |
+
- `use_liger_kernel`: False
|
624 |
+
- `eval_use_gather_object`: False
|
625 |
+
- `average_tokens_across_devices`: False
|
626 |
+
- `prompts`: {'anchor': 'Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:'}
|
627 |
+
- `batch_sampler`: no_duplicates
|
628 |
+
- `multi_dataset_batch_sampler`: proportional
|
629 |
+
- `router_mapping`: {}
|
630 |
+
- `learning_rate_mapping`: {}
|
631 |
+
|
632 |
+
</details>
|
633 |
+
|
634 |
+
### Training Logs
|
635 |
+
<details><summary>Click to expand</summary>
|
636 |
+
|
637 |
+
| Epoch | Step | Training Loss | Validation Loss | dev_koen_cosine_accuracy |
|
638 |
+
|:------:|:----:|:-------------:|:---------------:|:------------------------:|
|
639 |
+
| -1 | -1 | - | - | 0.6315 |
|
640 |
+
| 0.0092 | 1 | 1.2859 | - | - |
|
641 |
+
| 0.0183 | 2 | 1.0815 | - | - |
|
642 |
+
| 0.0275 | 3 | 1.0462 | - | - |
|
643 |
+
| 0.0367 | 4 | 0.9539 | - | - |
|
644 |
+
| 0.0459 | 5 | 0.7947 | - | - |
|
645 |
+
| 0.0550 | 6 | 0.7215 | - | - |
|
646 |
+
| 0.0642 | 7 | 0.6427 | - | - |
|
647 |
+
| 0.0734 | 8 | 0.5985 | - | - |
|
648 |
+
| 0.0826 | 9 | 0.5854 | - | - |
|
649 |
+
| 0.0917 | 10 | 0.5374 | - | - |
|
650 |
+
| 0.1009 | 11 | 0.5199 | - | - |
|
651 |
+
| 0.1101 | 12 | 0.4608 | - | - |
|
652 |
+
| 0.1193 | 13 | 0.4599 | - | - |
|
653 |
+
| 0.1284 | 14 | 0.4337 | - | - |
|
654 |
+
| 0.1376 | 15 | 0.4132 | - | - |
|
655 |
+
| 0.1468 | 16 | 0.4188 | - | - |
|
656 |
+
| 0.1560 | 17 | 0.3887 | - | - |
|
657 |
+
| 0.1651 | 18 | 0.3626 | - | - |
|
658 |
+
| 0.1743 | 19 | 0.3577 | - | - |
|
659 |
+
| 0.1835 | 20 | 0.3332 | - | - |
|
660 |
+
| 0.1927 | 21 | 0.3139 | - | - |
|
661 |
+
| 0.2018 | 22 | 0.2989 | - | - |
|
662 |
+
| 0.2110 | 23 | 0.2947 | - | - |
|
663 |
+
| 0.2202 | 24 | 0.2724 | - | - |
|
664 |
+
| 0.2294 | 25 | 0.2521 | - | - |
|
665 |
+
| 0.2385 | 26 | 0.225 | - | - |
|
666 |
+
| 0.2477 | 27 | 0.2285 | - | - |
|
667 |
+
| 0.2569 | 28 | 0.2326 | - | - |
|
668 |
+
| 0.2661 | 29 | 0.2138 | - | - |
|
669 |
+
| 0.2752 | 30 | 0.2075 | - | - |
|
670 |
+
| 0.2844 | 31 | 0.2148 | - | - |
|
671 |
+
| 0.2936 | 32 | 0.2054 | - | - |
|
672 |
+
| 0.3028 | 33 | 0.2117 | - | - |
|
673 |
+
| 0.3119 | 34 | 0.1802 | - | - |
|
674 |
+
| 0.3211 | 35 | 0.206 | - | - |
|
675 |
+
| 0.3303 | 36 | 0.1961 | - | - |
|
676 |
+
| 0.3394 | 37 | 0.1782 | - | - |
|
677 |
+
| 0.3486 | 38 | 0.1746 | - | - |
|
678 |
+
| 0.3578 | 39 | 0.1822 | - | - |
|
679 |
+
| 0.3670 | 40 | 0.17 | - | - |
|
680 |
+
| 0.3761 | 41 | 0.1636 | - | - |
|
681 |
+
| 0.3853 | 42 | 0.1617 | - | - |
|
682 |
+
| 0.3945 | 43 | 0.1409 | - | - |
|
683 |
+
| 0.4037 | 44 | 0.1451 | - | - |
|
684 |
+
| 0.4128 | 45 | 0.1359 | - | - |
|
685 |
+
| 0.4220 | 46 | 0.1289 | - | - |
|
686 |
+
| 0.4312 | 47 | 0.1356 | - | - |
|
687 |
+
| 0.4404 | 48 | 0.1326 | - | - |
|
688 |
+
| 0.4495 | 49 | 0.1239 | - | - |
|
689 |
+
| 0.4587 | 50 | 0.1279 | - | - |
|
690 |
+
| 0.4679 | 51 | 0.1105 | - | - |
|
691 |
+
| 0.4771 | 52 | 0.124 | - | - |
|
692 |
+
| 0.4862 | 53 | 0.1115 | - | - |
|
693 |
+
| 0.4954 | 54 | 0.1115 | - | - |
|
694 |
+
| 0.5046 | 55 | 0.1111 | 0.2269 | 0.6678 |
|
695 |
+
| 0.5138 | 56 | 0.1072 | - | - |
|
696 |
+
| 0.5229 | 57 | 0.1052 | - | - |
|
697 |
+
| 0.5321 | 58 | 0.1113 | - | - |
|
698 |
+
| 0.5413 | 59 | 0.1025 | - | - |
|
699 |
+
| 0.5505 | 60 | 0.0998 | - | - |
|
700 |
+
| 0.5596 | 61 | 0.1095 | - | - |
|
701 |
+
| 0.5688 | 62 | 0.1001 | - | - |
|
702 |
+
| 0.5780 | 63 | 0.111 | - | - |
|
703 |
+
| 0.5872 | 64 | 0.105 | - | - |
|
704 |
+
| 0.5963 | 65 | 0.1303 | - | - |
|
705 |
+
| 0.6055 | 66 | 0.1214 | - | - |
|
706 |
+
| 0.6147 | 67 | 0.1243 | - | - |
|
707 |
+
| 0.6239 | 68 | 0.1234 | - | - |
|
708 |
+
| 0.6330 | 69 | 0.1201 | - | - |
|
709 |
+
| 0.6422 | 70 | 0.1156 | - | - |
|
710 |
+
| 0.6514 | 71 | 0.1268 | - | - |
|
711 |
+
| 0.6606 | 72 | 0.1224 | - | - |
|
712 |
+
| 0.6697 | 73 | 0.1227 | - | - |
|
713 |
+
| 0.6789 | 74 | 0.1153 | - | - |
|
714 |
+
| 0.6881 | 75 | 0.1133 | - | - |
|
715 |
+
| 0.6972 | 76 | 0.1218 | - | - |
|
716 |
+
| 0.7064 | 77 | 0.1369 | - | - |
|
717 |
+
| 0.7156 | 78 | 0.128 | - | - |
|
718 |
+
| 0.7248 | 79 | 0.1352 | - | - |
|
719 |
+
| 0.7339 | 80 | 0.1379 | - | - |
|
720 |
+
| 0.7431 | 81 | 0.1468 | - | - |
|
721 |
+
| 0.7523 | 82 | 0.1489 | - | - |
|
722 |
+
| 0.7615 | 83 | 0.1512 | - | - |
|
723 |
+
| 0.7706 | 84 | 0.1424 | - | - |
|
724 |
+
| 0.7798 | 85 | 0.1507 | - | - |
|
725 |
+
| 0.7890 | 86 | 0.157 | - | - |
|
726 |
+
| 0.7982 | 87 | 0.1513 | - | - |
|
727 |
+
| 0.8073 | 88 | 0.163 | - | - |
|
728 |
+
| 0.8165 | 89 | 0.1471 | - | - |
|
729 |
+
| 0.8257 | 90 | 0.1636 | - | - |
|
730 |
+
| 0.8349 | 91 | 0.1681 | - | - |
|
731 |
+
| 0.8440 | 92 | 0.1693 | - | - |
|
732 |
+
| 0.8532 | 93 | 0.1835 | - | - |
|
733 |
+
| 0.8624 | 94 | 0.1716 | - | - |
|
734 |
+
| 0.8716 | 95 | 0.1817 | - | - |
|
735 |
+
| 0.8807 | 96 | 0.1999 | - | - |
|
736 |
+
| 0.8899 | 97 | 0.21 | - | - |
|
737 |
+
| 0.8991 | 98 | 0.2278 | - | - |
|
738 |
+
| 0.9083 | 99 | 0.2273 | - | - |
|
739 |
+
| 0.9174 | 100 | 0.2469 | - | - |
|
740 |
+
| 0.9266 | 101 | 0.2742 | - | - |
|
741 |
+
| 0.9358 | 102 | 0.2805 | - | - |
|
742 |
+
| 0.9450 | 103 | 0.2932 | - | - |
|
743 |
+
| 0.9541 | 104 | 0.2915 | - | - |
|
744 |
+
| 0.9633 | 105 | 0.2814 | - | - |
|
745 |
+
| 0.9725 | 106 | 0.3003 | - | - |
|
746 |
+
| 0.9817 | 107 | 0.3346 | - | - |
|
747 |
+
| 0.9908 | 108 | 0.315 | - | - |
|
748 |
+
| 1.0 | 109 | 0.9444 | - | - |
|
749 |
+
| 1.0092 | 110 | 0.5552 | 0.1111 | 0.7004 |
|
750 |
+
| 1.0183 | 111 | 0.4133 | - | - |
|
751 |
+
| 1.0275 | 112 | 0.3811 | - | - |
|
752 |
+
| 1.0367 | 113 | 0.3525 | - | - |
|
753 |
+
| 1.0459 | 114 | 0.2925 | - | - |
|
754 |
+
| 1.0550 | 115 | 0.2563 | - | - |
|
755 |
+
| 1.0642 | 116 | 0.2417 | - | - |
|
756 |
+
| 1.0734 | 117 | 0.2073 | - | - |
|
757 |
+
| 1.0826 | 118 | 0.2073 | - | - |
|
758 |
+
| 1.0917 | 119 | 0.1958 | - | - |
|
759 |
+
| 1.1009 | 120 | 0.1842 | - | - |
|
760 |
+
| 1.1101 | 121 | 0.1733 | - | - |
|
761 |
+
| 1.1193 | 122 | 0.1799 | - | - |
|
762 |
+
| 1.1284 | 123 | 0.15 | - | - |
|
763 |
+
| 1.1376 | 124 | 0.1476 | - | - |
|
764 |
+
| 1.1468 | 125 | 0.1507 | - | - |
|
765 |
+
| 1.1560 | 126 | 0.1448 | - | - |
|
766 |
+
| 1.1651 | 127 | 0.1197 | - | - |
|
767 |
+
| 1.1743 | 128 | 0.1206 | - | - |
|
768 |
+
| 1.1835 | 129 | 0.1223 | - | - |
|
769 |
+
| 1.1927 | 130 | 0.102 | - | - |
|
770 |
+
| 1.2018 | 131 | 0.0991 | - | - |
|
771 |
+
| 1.2110 | 132 | 0.1023 | - | - |
|
772 |
+
| 1.2202 | 133 | 0.0914 | - | - |
|
773 |
+
| 1.2294 | 134 | 0.085 | - | - |
|
774 |
+
| 1.2385 | 135 | 0.0575 | - | - |
|
775 |
+
| 1.2477 | 136 | 0.062 | - | - |
|
776 |
+
| 1.2569 | 137 | 0.0674 | - | - |
|
777 |
+
| 1.2661 | 138 | 0.0617 | - | - |
|
778 |
+
| 1.2752 | 139 | 0.0554 | - | - |
|
779 |
+
| 1.2844 | 140 | 0.0541 | - | - |
|
780 |
+
| 1.2936 | 141 | 0.0605 | - | - |
|
781 |
+
| 1.3028 | 142 | 0.0535 | - | - |
|
782 |
+
| 1.3119 | 143 | 0.0431 | - | - |
|
783 |
+
| 1.3211 | 144 | 0.0527 | - | - |
|
784 |
+
| 1.3303 | 145 | 0.0497 | - | - |
|
785 |
+
| 1.3394 | 146 | 0.046 | - | - |
|
786 |
+
| 1.3486 | 147 | 0.036 | - | - |
|
787 |
+
| 1.3578 | 148 | 0.0456 | - | - |
|
788 |
+
| 1.3670 | 149 | 0.0441 | - | - |
|
789 |
+
| 1.3761 | 150 | 0.0347 | - | - |
|
790 |
+
| 1.3853 | 151 | 0.0367 | - | - |
|
791 |
+
| 1.3945 | 152 | 0.0287 | - | - |
|
792 |
+
| 1.4037 | 153 | 0.0265 | - | - |
|
793 |
+
| 1.4128 | 154 | 0.0233 | - | - |
|
794 |
+
| 1.4220 | 155 | 0.0222 | - | - |
|
795 |
+
| 1.4312 | 156 | 0.0295 | - | - |
|
796 |
+
| 1.4404 | 157 | 0.027 | - | - |
|
797 |
+
| 1.4495 | 158 | 0.0233 | - | - |
|
798 |
+
| 1.4587 | 159 | 0.0243 | - | - |
|
799 |
+
| 1.4679 | 160 | 0.019 | - | - |
|
800 |
+
| 1.4771 | 161 | 0.0233 | - | - |
|
801 |
+
| 1.4862 | 162 | 0.0209 | - | - |
|
802 |
+
| 1.4954 | 163 | 0.0211 | - | - |
|
803 |
+
| 1.5046 | 164 | 0.0178 | - | - |
|
804 |
+
| 1.5138 | 165 | 0.0186 | 0.1471 | 0.6915 |
|
805 |
+
| 1.5229 | 166 | 0.0181 | - | - |
|
806 |
+
| 1.5321 | 167 | 0.0214 | - | - |
|
807 |
+
| 1.5413 | 168 | 0.0179 | - | - |
|
808 |
+
| 1.5505 | 169 | 0.0168 | - | - |
|
809 |
+
| 1.5596 | 170 | 0.0206 | - | - |
|
810 |
+
| 1.5688 | 171 | 0.0175 | - | - |
|
811 |
+
| 1.5780 | 172 | 0.0207 | - | - |
|
812 |
+
| 1.5872 | 173 | 0.0213 | - | - |
|
813 |
+
| 1.5963 | 174 | 0.0291 | - | - |
|
814 |
+
| 1.6055 | 175 | 0.0263 | - | - |
|
815 |
+
| 1.6147 | 176 | 0.0264 | - | - |
|
816 |
+
| 1.6239 | 177 | 0.0265 | - | - |
|
817 |
+
| 1.6330 | 178 | 0.0257 | - | - |
|
818 |
+
| 1.6422 | 179 | 0.0215 | - | - |
|
819 |
+
| 1.6514 | 180 | 0.0275 | - | - |
|
820 |
+
| 1.6606 | 181 | 0.0244 | - | - |
|
821 |
+
| 1.6697 | 182 | 0.0288 | - | - |
|
822 |
+
| 1.6789 | 183 | 0.0234 | - | - |
|
823 |
+
| 1.6881 | 184 | 0.0225 | - | - |
|
824 |
+
| 1.6972 | 185 | 0.0286 | - | - |
|
825 |
+
| 1.7064 | 186 | 0.0333 | - | - |
|
826 |
+
| 1.7156 | 187 | 0.0317 | - | - |
|
827 |
+
| 1.7248 | 188 | 0.0338 | - | - |
|
828 |
+
| 1.7339 | 189 | 0.0344 | - | - |
|
829 |
+
| 1.7431 | 190 | 0.0367 | - | - |
|
830 |
+
| 1.7523 | 191 | 0.0355 | - | - |
|
831 |
+
| 1.7615 | 192 | 0.0434 | - | - |
|
832 |
+
| 1.7706 | 193 | 0.0369 | - | - |
|
833 |
+
| 1.7798 | 194 | 0.0383 | - | - |
|
834 |
+
| 1.7890 | 195 | 0.0416 | - | - |
|
835 |
+
| 1.7982 | 196 | 0.0434 | - | - |
|
836 |
+
| 1.8073 | 197 | 0.0465 | - | - |
|
837 |
+
| 1.8165 | 198 | 0.0387 | - | - |
|
838 |
+
| 1.8257 | 199 | 0.0508 | - | - |
|
839 |
+
| 1.8349 | 200 | 0.0485 | - | - |
|
840 |
+
| 1.8440 | 201 | 0.0487 | - | - |
|
841 |
+
| 1.8532 | 202 | 0.056 | - | - |
|
842 |
+
| 1.8624 | 203 | 0.0517 | - | - |
|
843 |
+
| 1.8716 | 204 | 0.0523 | - | - |
|
844 |
+
| 1.8807 | 205 | 0.0711 | - | - |
|
845 |
+
| 1.8899 | 206 | 0.0727 | - | - |
|
846 |
+
| 1.8991 | 207 | 0.0836 | - | - |
|
847 |
+
| 1.9083 | 208 | 0.0782 | - | - |
|
848 |
+
| 1.9174 | 209 | 0.0955 | - | - |
|
849 |
+
| 1.9266 | 210 | 0.1165 | - | - |
|
850 |
+
| 1.9358 | 211 | 0.1154 | - | - |
|
851 |
+
| 1.9450 | 212 | 0.1302 | - | - |
|
852 |
+
| 1.9541 | 213 | 0.1276 | - | - |
|
853 |
+
| 1.9633 | 214 | 0.1305 | - | - |
|
854 |
+
| 1.9725 | 215 | 0.1403 | - | - |
|
855 |
+
| 1.9817 | 216 | 0.1713 | - | - |
|
856 |
+
| 1.9908 | 217 | 0.1908 | - | - |
|
857 |
+
| 2.0 | 218 | 0.7015 | - | - |
|
858 |
+
| 2.0092 | 219 | 0.4075 | - | - |
|
859 |
+
| 2.0183 | 220 | 0.2799 | 0.0935 | 0.7097 |
|
860 |
+
| 2.0275 | 221 | 0.2448 | - | - |
|
861 |
+
| 2.0367 | 222 | 0.2071 | - | - |
|
862 |
+
| 2.0459 | 223 | 0.1706 | - | - |
|
863 |
+
| 2.0550 | 224 | 0.1503 | - | - |
|
864 |
+
| 2.0642 | 225 | 0.1375 | - | - |
|
865 |
+
| 2.0734 | 226 | 0.107 | - | - |
|
866 |
+
| 2.0826 | 227 | 0.1033 | - | - |
|
867 |
+
| 2.0917 | 228 | 0.1083 | - | - |
|
868 |
+
| 2.1009 | 229 | 0.1029 | - | - |
|
869 |
+
| 2.1101 | 230 | 0.0772 | - | - |
|
870 |
+
| 2.1193 | 231 | 0.0835 | - | - |
|
871 |
+
| 2.1284 | 232 | 0.0721 | - | - |
|
872 |
+
| 2.1376 | 233 | 0.067 | - | - |
|
873 |
+
| 2.1468 | 234 | 0.0682 | - | - |
|
874 |
+
| 2.1560 | 235 | 0.0687 | - | - |
|
875 |
+
| 2.1651 | 236 | 0.0447 | - | - |
|
876 |
+
| 2.1743 | 237 | 0.0476 | - | - |
|
877 |
+
| 2.1835 | 238 | 0.0434 | - | - |
|
878 |
+
| 2.1927 | 239 | 0.0351 | - | - |
|
879 |
+
| 2.2018 | 240 | 0.0371 | - | - |
|
880 |
+
| 2.2110 | 241 | 0.0289 | - | - |
|
881 |
+
| 2.2202 | 242 | 0.0277 | - | - |
|
882 |
+
| 2.2294 | 243 | 0.0261 | - | - |
|
883 |
+
| 2.2385 | 244 | 0.0146 | - | - |
|
884 |
+
| 2.2477 | 245 | 0.0135 | - | - |
|
885 |
+
| 2.2569 | 246 | 0.0188 | - | - |
|
886 |
+
| 2.2661 | 247 | 0.0156 | - | - |
|
887 |
+
| 2.2752 | 248 | 0.0134 | - | - |
|
888 |
+
| 2.2844 | 249 | 0.0134 | - | - |
|
889 |
+
| 2.2936 | 250 | 0.0147 | - | - |
|
890 |
+
| 2.3028 | 251 | 0.0147 | - | - |
|
891 |
+
| 2.3119 | 252 | 0.0114 | - | - |
|
892 |
+
| 2.3211 | 253 | 0.0139 | - | - |
|
893 |
+
| 2.3303 | 254 | 0.0109 | - | - |
|
894 |
+
| 2.3394 | 255 | 0.0113 | - | - |
|
895 |
+
| 2.3486 | 256 | 0.01 | - | - |
|
896 |
+
| 2.3578 | 257 | 0.0122 | - | - |
|
897 |
+
| 2.3670 | 258 | 0.0113 | - | - |
|
898 |
+
| 2.3761 | 259 | 0.0092 | - | - |
|
899 |
+
| 2.3853 | 260 | 0.0098 | - | - |
|
900 |
+
| 2.3945 | 261 | 0.0065 | - | - |
|
901 |
+
| 2.4037 | 262 | 0.0068 | - | - |
|
902 |
+
| 2.4128 | 263 | 0.007 | - | - |
|
903 |
+
| 2.4220 | 264 | 0.007 | - | - |
|
904 |
+
| 2.4312 | 265 | 0.0084 | - | - |
|
905 |
+
| 2.4404 | 266 | 0.009 | - | - |
|
906 |
+
| 2.4495 | 267 | 0.0066 | - | - |
|
907 |
+
| 2.4587 | 268 | 0.0073 | - | - |
|
908 |
+
| 2.4679 | 269 | 0.0065 | - | - |
|
909 |
+
| 2.4771 | 270 | 0.0085 | - | - |
|
910 |
+
| 2.4862 | 271 | 0.0068 | - | - |
|
911 |
+
| 2.4954 | 272 | 0.0072 | - | - |
|
912 |
+
| 2.5046 | 273 | 0.0071 | - | - |
|
913 |
+
| 2.5138 | 274 | 0.0067 | - | - |
|
914 |
+
| 2.5229 | 275 | 0.0066 | 0.1569 | 0.6882 |
|
915 |
+
| 2.5321 | 276 | 0.0073 | - | - |
|
916 |
+
| 2.5413 | 277 | 0.0071 | - | - |
|
917 |
+
| 2.5505 | 278 | 0.0068 | - | - |
|
918 |
+
| 2.5596 | 279 | 0.0072 | - | - |
|
919 |
+
| 2.5688 | 280 | 0.0066 | - | - |
|
920 |
+
| 2.5780 | 281 | 0.0081 | - | - |
|
921 |
+
| 2.5872 | 282 | 0.0075 | - | - |
|
922 |
+
| 2.5963 | 283 | 0.0086 | - | - |
|
923 |
+
| 2.6055 | 284 | 0.009 | - | - |
|
924 |
+
| 2.6147 | 285 | 0.0084 | - | - |
|
925 |
+
| 2.6239 | 286 | 0.0096 | - | - |
|
926 |
+
| 2.6330 | 287 | 0.0082 | - | - |
|
927 |
+
| 2.6422 | 288 | 0.0076 | - | - |
|
928 |
+
| 2.6514 | 289 | 0.0094 | - | - |
|
929 |
+
| 2.6606 | 290 | 0.0094 | - | - |
|
930 |
+
| 2.6697 | 291 | 0.0094 | - | - |
|
931 |
+
| 2.6789 | 292 | 0.0085 | - | - |
|
932 |
+
| 2.6881 | 293 | 0.0102 | - | - |
|
933 |
+
| 2.6972 | 294 | 0.0122 | - | - |
|
934 |
+
| 2.7064 | 295 | 0.0132 | - | - |
|
935 |
+
| 2.7156 | 296 | 0.0131 | - | - |
|
936 |
+
| 2.7248 | 297 | 0.0133 | - | - |
|
937 |
+
| 2.7339 | 298 | 0.013 | - | - |
|
938 |
+
| 2.7431 | 299 | 0.0158 | - | - |
|
939 |
+
| 2.7523 | 300 | 0.0144 | - | - |
|
940 |
+
| 2.7615 | 301 | 0.0161 | - | - |
|
941 |
+
| 2.7706 | 302 | 0.0153 | - | - |
|
942 |
+
| 2.7798 | 303 | 0.0167 | - | - |
|
943 |
+
| 2.7890 | 304 | 0.0179 | - | - |
|
944 |
+
| 2.7982 | 305 | 0.0176 | - | - |
|
945 |
+
| 2.8073 | 306 | 0.0178 | - | - |
|
946 |
+
| 2.8165 | 307 | 0.0179 | - | - |
|
947 |
+
| 2.8257 | 308 | 0.0209 | - | - |
|
948 |
+
| 2.8349 | 309 | 0.0194 | - | - |
|
949 |
+
| 2.8440 | 310 | 0.022 | - | - |
|
950 |
+
| 2.8532 | 311 | 0.0262 | - | - |
|
951 |
+
| 2.8624 | 312 | 0.0226 | - | - |
|
952 |
+
| 2.8716 | 313 | 0.0226 | - | - |
|
953 |
+
| 2.8807 | 314 | 0.0352 | - | - |
|
954 |
+
| 2.8899 | 315 | 0.0371 | - | - |
|
955 |
+
| 2.8991 | 316 | 0.0427 | - | - |
|
956 |
+
| 2.9083 | 317 | 0.0396 | - | - |
|
957 |
+
| 2.9174 | 318 | 0.052 | - | - |
|
958 |
+
| 2.9266 | 319 | 0.067 | - | - |
|
959 |
+
| 2.9358 | 320 | 0.0687 | - | - |
|
960 |
+
| 2.9450 | 321 | 0.0866 | - | - |
|
961 |
+
| 2.9541 | 322 | 0.0793 | - | - |
|
962 |
+
| 2.9633 | 323 | 0.0853 | - | - |
|
963 |
+
| 2.9725 | 324 | 0.0918 | - | - |
|
964 |
+
| 2.9817 | 325 | 0.1179 | - | - |
|
965 |
+
| 2.9908 | 326 | 0.1239 | - | - |
|
966 |
+
| 3.0 | 327 | 0.5811 | - | - |
|
967 |
+
|
968 |
+
</details>
|
969 |
+
|
970 |
+
### Framework Versions
|
971 |
+
- Python: 3.11.12
|
972 |
+
- Sentence Transformers: 5.0.0
|
973 |
+
- Transformers: 4.51.3
|
974 |
+
- PyTorch: 2.7.0+cu128
|
975 |
+
- Accelerate: 1.5.2
|
976 |
+
- Datasets: 2.21.0
|
977 |
+
- Tokenizers: 0.21.1
|
978 |
+
|
979 |
+
## Citation
|
980 |
+
|
981 |
+
### BibTeX
|
982 |
+
|
983 |
+
#### Sentence Transformers
|
984 |
+
```bibtex
|
985 |
+
@inproceedings{reimers-2019-sentence-bert,
|
986 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
987 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
988 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
989 |
+
month = "11",
|
990 |
+
year = "2019",
|
991 |
+
publisher = "Association for Computational Linguistics",
|
992 |
+
url = "https://arxiv.org/abs/1908.10084",
|
993 |
+
}
|
994 |
+
```
|
995 |
+
|
996 |
+
<!--
|
997 |
+
## Glossary
|
998 |
+
|
999 |
+
*Clearly define terms in order to be accessible across audiences.*
|
1000 |
+
-->
|
1001 |
+
|
1002 |
+
<!--
|
1003 |
+
## Model Card Authors
|
1004 |
+
|
1005 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
1006 |
+
-->
|
1007 |
+
|
1008 |
+
<!--
|
1009 |
+
## Model Card Contact
|
1010 |
+
|
1011 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
1012 |
+
-->
|
added_tokens.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</think>": 151668,
|
3 |
+
"</tool_call>": 151658,
|
4 |
+
"</tool_response>": 151666,
|
5 |
+
"<think>": 151667,
|
6 |
+
"<tool_call>": 151657,
|
7 |
+
"<tool_response>": 151665,
|
8 |
+
"<|box_end|>": 151649,
|
9 |
+
"<|box_start|>": 151648,
|
10 |
+
"<|endoftext|>": 151643,
|
11 |
+
"<|file_sep|>": 151664,
|
12 |
+
"<|fim_middle|>": 151660,
|
13 |
+
"<|fim_pad|>": 151662,
|
14 |
+
"<|fim_prefix|>": 151659,
|
15 |
+
"<|fim_suffix|>": 151661,
|
16 |
+
"<|im_end|>": 151645,
|
17 |
+
"<|im_start|>": 151644,
|
18 |
+
"<|image_pad|>": 151655,
|
19 |
+
"<|object_ref_end|>": 151647,
|
20 |
+
"<|object_ref_start|>": 151646,
|
21 |
+
"<|quad_end|>": 151651,
|
22 |
+
"<|quad_start|>": 151650,
|
23 |
+
"<|repo_name|>": 151663,
|
24 |
+
"<|video_pad|>": 151656,
|
25 |
+
"<|vision_end|>": 151653,
|
26 |
+
"<|vision_pad|>": 151654,
|
27 |
+
"<|vision_start|>": 151652
|
28 |
+
}
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen3Model"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151645,
|
9 |
+
"head_dim": 128,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"max_position_embeddings": 40960,
|
15 |
+
"max_window_layers": 28,
|
16 |
+
"model_type": "qwen3",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 28,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"rms_norm_eps": 1e-06,
|
21 |
+
"rope_scaling": null,
|
22 |
+
"rope_theta": 1000000,
|
23 |
+
"sliding_window": null,
|
24 |
+
"tie_word_embeddings": true,
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.51.3",
|
27 |
+
"use_cache": true,
|
28 |
+
"use_sliding_window": false,
|
29 |
+
"vocab_size": 151936
|
30 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "SentenceTransformer",
|
3 |
+
"__version__": {
|
4 |
+
"sentence_transformers": "5.0.0",
|
5 |
+
"transformers": "4.51.3",
|
6 |
+
"pytorch": "2.7.0+cu128"
|
7 |
+
},
|
8 |
+
"prompts": {
|
9 |
+
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
|
10 |
+
"document": ""
|
11 |
+
},
|
12 |
+
"default_prompt_name": null,
|
13 |
+
"similarity_fn_name": "cosine"
|
14 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:747defaa73c73ea09120679856f38c32a6f3b8db9a7358930e012f6d7a5c3e58
|
3 |
+
size 2384233112
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>",
|
5 |
+
"<|object_ref_start|>",
|
6 |
+
"<|object_ref_end|>",
|
7 |
+
"<|box_start|>",
|
8 |
+
"<|box_end|>",
|
9 |
+
"<|quad_start|>",
|
10 |
+
"<|quad_end|>",
|
11 |
+
"<|vision_start|>",
|
12 |
+
"<|vision_end|>",
|
13 |
+
"<|vision_pad|>",
|
14 |
+
"<|image_pad|>",
|
15 |
+
"<|video_pad|>"
|
16 |
+
],
|
17 |
+
"eos_token": {
|
18 |
+
"content": "<|im_end|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
},
|
24 |
+
"pad_token": {
|
25 |
+
"content": "<|endoftext|>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": false,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
}
|
31 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b703c7786128f3188e39ca909a734c4dde9b51819c66ca96dfacabd758c23e42
|
3 |
+
size 11423970
|
tokenizer_config.json
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"151643": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"151644": {
|
14 |
+
"content": "<|im_start|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"151645": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"151646": {
|
30 |
+
"content": "<|object_ref_start|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"151647": {
|
38 |
+
"content": "<|object_ref_end|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"151648": {
|
46 |
+
"content": "<|box_start|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"151649": {
|
54 |
+
"content": "<|box_end|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"151650": {
|
62 |
+
"content": "<|quad_start|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"151651": {
|
70 |
+
"content": "<|quad_end|>",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": false,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"151652": {
|
78 |
+
"content": "<|vision_start|>",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": false,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
},
|
85 |
+
"151653": {
|
86 |
+
"content": "<|vision_end|>",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": false,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": true
|
92 |
+
},
|
93 |
+
"151654": {
|
94 |
+
"content": "<|vision_pad|>",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": false,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": true
|
100 |
+
},
|
101 |
+
"151655": {
|
102 |
+
"content": "<|image_pad|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": true
|
108 |
+
},
|
109 |
+
"151656": {
|
110 |
+
"content": "<|video_pad|>",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": false,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": true
|
116 |
+
},
|
117 |
+
"151657": {
|
118 |
+
"content": "<tool_call>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"151658": {
|
126 |
+
"content": "</tool_call>",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": false,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"151659": {
|
134 |
+
"content": "<|fim_prefix|>",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": false,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"151660": {
|
142 |
+
"content": "<|fim_middle|>",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": false,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"151661": {
|
150 |
+
"content": "<|fim_suffix|>",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": false,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"151662": {
|
158 |
+
"content": "<|fim_pad|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"151663": {
|
166 |
+
"content": "<|repo_name|>",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": false,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"151664": {
|
174 |
+
"content": "<|file_sep|>",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": false,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
},
|
181 |
+
"151665": {
|
182 |
+
"content": "<tool_response>",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": false,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": false
|
188 |
+
},
|
189 |
+
"151666": {
|
190 |
+
"content": "</tool_response>",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": false,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": false
|
196 |
+
},
|
197 |
+
"151667": {
|
198 |
+
"content": "<think>",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": false,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": false
|
204 |
+
},
|
205 |
+
"151668": {
|
206 |
+
"content": "</think>",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": false,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": false
|
212 |
+
}
|
213 |
+
},
|
214 |
+
"additional_special_tokens": [
|
215 |
+
"<|im_start|>",
|
216 |
+
"<|im_end|>",
|
217 |
+
"<|object_ref_start|>",
|
218 |
+
"<|object_ref_end|>",
|
219 |
+
"<|box_start|>",
|
220 |
+
"<|box_end|>",
|
221 |
+
"<|quad_start|>",
|
222 |
+
"<|quad_end|>",
|
223 |
+
"<|vision_start|>",
|
224 |
+
"<|vision_end|>",
|
225 |
+
"<|vision_pad|>",
|
226 |
+
"<|image_pad|>",
|
227 |
+
"<|video_pad|>"
|
228 |
+
],
|
229 |
+
"bos_token": null,
|
230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
231 |
+
"clean_up_tokenization_spaces": false,
|
232 |
+
"eos_token": "<|im_end|>",
|
233 |
+
"errors": "replace",
|
234 |
+
"extra_special_tokens": {},
|
235 |
+
"max_length": 2048,
|
236 |
+
"model_max_length": 8192,
|
237 |
+
"pad_to_multiple_of": null,
|
238 |
+
"pad_token": "<|endoftext|>",
|
239 |
+
"pad_token_type_id": 0,
|
240 |
+
"padding_side": "right",
|
241 |
+
"split_special_tokens": false,
|
242 |
+
"stride": 0,
|
243 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
244 |
+
"truncation_side": "right",
|
245 |
+
"truncation_strategy": "longest_first",
|
246 |
+
"unk_token": null
|
247 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|