sileod commited on
Commit
a21a54c
·
verified ·
1 Parent(s): 4fefa8f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -99
README.md CHANGED
@@ -20,109 +20,110 @@ tags:
20
  ModernBERT multi-task fine-tuned on tasksource NLI tasks, including MNLI, ANLI, SICK, WANLI, doc-nli, LingNLI, FOLIO, FOL-NLI, LogicNLI, Label-NLI and all datasets in the below table).
21
  This is the equivalent of an "instruct" version.
22
 
23
- Test accuracy at 10k training steps (current version, 100k steps incoming at the end of the week).
24
 
25
  | test_name | test_accuracy |
26
  |:-------------------------------------|----------------:|
27
- | glue/mnli | 0.82 |
28
- | glue/qnli | 0.84 |
29
- | glue/rte | 0.78 |
30
- | super_glue/cb | 0.75 |
31
- | anli/a1 | 0.51 |
32
- | anli/a2 | 0.39 |
33
- | anli/a3 | 0.38 |
34
- | sick/label | 0.91 |
35
- | sick/entailment_AB | 0.81 |
36
- | snli | 0.82 |
37
- | scitail/snli_format | 0.94 |
38
- | hans | 0.99 |
39
- | WANLI | 0.7 |
40
- | recast/recast_ner | 0.84 |
41
- | recast/recast_kg_relations | 0.89 |
42
- | recast/recast_puns | 0.78 |
43
- | recast/recast_verbcorner | 0.87 |
44
- | recast/recast_sentiment | 0.97 |
45
- | recast/recast_verbnet | 0.74 |
46
- | recast/recast_factuality | 0.88 |
47
- | recast/recast_megaveridicality | 0.86 |
48
- | probability_words_nli/reasoning_2hop | 0.76 |
49
- | probability_words_nli/reasoning_1hop | 0.84 |
50
- | probability_words_nli/usnli | 0.7 |
51
- | nan-nli | 0.62 |
52
- | nli_fever | 0.71 |
53
- | breaking_nli | 0.98 |
54
- | conj_nli | 0.66 |
55
- | fracas | 0 |
56
- | dialogue_nli | 0.84 |
57
- | mpe | 0.69 |
58
- | dnc | 0.81 |
59
- | recast_white/fnplus | 0.6 |
60
- | recast_white/sprl | 0.83 |
61
- | recast_white/dpr | 0.57 |
62
- | robust_nli/IS_CS | 0.45 |
63
- | robust_nli/LI_LI | 0.92 |
64
- | robust_nli/ST_WO | 0.66 |
65
- | robust_nli/PI_SP | 0.53 |
66
- | robust_nli/PI_CD | 0.54 |
67
- | robust_nli/ST_SE | 0.58 |
68
- | robust_nli/ST_NE | 0.52 |
69
- | robust_nli/ST_LM | 0.47 |
70
- | robust_nli_is_sd | 0.99 |
71
- | robust_nli_li_ts | 0.81 |
72
- | add_one_rte | 0.87 |
73
- | cycic_classification | 0.62 |
74
- | lingnli | 0.73 |
75
- | monotonicity-entailment | 0.84 |
76
- | scinli | 0.65 |
77
- | naturallogic | 0.77 |
78
- | syntactic-augmentation-nli | 0.87 |
79
- | autotnli | 0.83 |
80
- | defeasible-nli/atomic | 0.72 |
81
- | defeasible-nli/snli | 0.67 |
82
- | help-nli | 0.72 |
83
- | nli-veridicality-transitivity | 0.92 |
84
- | lonli | 0.88 |
85
- | dadc-limit-nli | 0.59 |
86
- | folio | 0.44 |
87
- | tomi-nli | 0.52 |
88
- | temporal-nli | 0.62 |
89
- | counterfactually-augmented-snli | 0.69 |
90
- | cnli | 0.71 |
91
- | logiqa-2.0-nli | 0.51 |
92
- | mindgames | 0.83 |
93
- | ConTRoL-nli | 0.49 |
94
- | logical-fallacy | 0.13 |
95
- | conceptrules_v2 | 0.97 |
96
- | zero-shot-label-nli | 0.67 |
97
- | scone | 0.79 |
98
- | monli | 0.76 |
99
- | SpaceNLI | 0.89 |
100
- | propsegment/nli | 0.82 |
101
- | SDOH-NLI | 0.98 |
102
- | scifact_entailment | 0.52 |
103
- | AdjectiveScaleProbe-nli | 0.91 |
104
- | resnli | 0.97 |
105
- | semantic_fragments_nli | 0.91 |
106
- | dataset_train_nli | 0.81 |
107
- | ruletaker | 0.69 |
108
  | PARARULE-Plus | 1 |
109
- | logical-entailment | 0.53 |
110
- | nope | 0.36 |
111
- | LogicNLI | 0.34 |
112
- | contract-nli/contractnli_a/seg | 0.79 |
113
- | contract-nli/contractnli_b/full | 0.67 |
114
- | nli4ct_semeval2024 | 0.53 |
115
- | biosift-nli | 0.85 |
116
- | SIGA-nli | 0.46 |
117
- | FOL-nli | 0.49 |
118
- | doc-nli | 0.81 |
119
- | mctest-nli | 0.84 |
120
- | idioms-nli | 0.77 |
121
- | lifecycle-entailment | 0.57 |
122
- | MSciNLI | 0.65 |
123
- | babi_nli | 0.77 |
124
- | gen_debiased_nli | 0.82 |
125
-
 
126
 
127
  # Usage
128
 
 
20
  ModernBERT multi-task fine-tuned on tasksource NLI tasks, including MNLI, ANLI, SICK, WANLI, doc-nli, LingNLI, FOLIO, FOL-NLI, LogicNLI, Label-NLI and all datasets in the below table).
21
  This is the equivalent of an "instruct" version.
22
 
23
+ Test accuracy at 100k training steps. 250k steps version coming around 25 december.
24
 
25
  | test_name | test_accuracy |
26
  |:-------------------------------------|----------------:|
27
+ | glue/mnli | 0.91 |
28
+ | glue/qnli | 0.93 |
29
+ | glue/rte | 0.86 |
30
+ | super_glue/cb | 0.89 |
31
+ | anli/a1 | 0.62 |
32
+ | anli/a2 | 0.47 |
33
+ | anli/a3 | 0.42 |
34
+ | sick/label | 0.92 |
35
+ | sick/entailment_AB | 0.84 |
36
+ | snli | 0.91 |
37
+ | scitail/snli_format | 0.95 |
38
+ | hans | 1 |
39
+ | WANLI | 0.71 |
40
+ | recast/recast_sentiment | 0.98 |
41
+ | recast/recast_verbcorner | 0.94 |
42
+ | recast/recast_ner | 0.87 |
43
+ | recast/recast_factuality | 0.93 |
44
+ | recast/recast_puns | 0.93 |
45
+ | recast/recast_kg_relations | 0.94 |
46
+ | recast/recast_verbnet | 0.88 |
47
+ | recast/recast_megaveridicality | 0.87 |
48
+ | probability_words_nli/usnli | 0.77 |
49
+ | probability_words_nli/reasoning_1hop | 0.99 |
50
+ | probability_words_nli/reasoning_2hop | 0.9 |
51
+ | nan-nli | 0.85 |
52
+ | nli_fever | 0.72 |
53
+ | breaking_nli | 1 |
54
+ | conj_nli | 0.71 |
55
+ | fracas | 0.86 |
56
+ | dialogue_nli | 0.88 |
57
+ | mpe | 0.73 |
58
+ | dnc | 0.9 |
59
+ | recast_white/fnplus | 0.81 |
60
+ | recast_white/sprl | 0.92 |
61
+ | recast_white/dpr | 0.61 |
62
+ | robust_nli/IS_CS | 0.76 |
63
+ | robust_nli/LI_LI | 0.98 |
64
+ | robust_nli/ST_WO | 0.85 |
65
+ | robust_nli/PI_SP | 0.74 |
66
+ | robust_nli/PI_CD | 0.8 |
67
+ | robust_nli/ST_SE | 0.78 |
68
+ | robust_nli/ST_NE | 0.86 |
69
+ | robust_nli/ST_LM | 0.81 |
70
+ | robust_nli_is_sd | 1 |
71
+ | robust_nli_li_ts | 0.91 |
72
+ | add_one_rte | 0.91 |
73
+ | cycic_classification | 0.83 |
74
+ | lingnli | 0.82 |
75
+ | monotonicity-entailment | 0.95 |
76
+ | scinli | 0.79 |
77
+ | naturallogic | 0.91 |
78
+ | syntactic-augmentation-nli | 0.95 |
79
+ | autotnli | 0.92 |
80
+ | defeasible-nli/atomic | 0.76 |
81
+ | defeasible-nli/snli | 0.79 |
82
+ | help-nli | 0.91 |
83
+ | nli-veridicality-transitivity | 0.99 |
84
+ | lonli | 0.99 |
85
+ | dadc-limit-nli | 0.67 |
86
+ | folio | 0.59 |
87
+ | tomi-nli | 0.53 |
88
+ | temporal-nli | 0.92 |
89
+ | counterfactually-augmented-snli | 0.74 |
90
+ | cnli | 0.81 |
91
+ | logiqa-2.0-nli | 0.57 |
92
+ | mindgames | 0.94 |
93
+ | ConTRoL-nli | 0.65 |
94
+ | logical-fallacy | 0.31 |
95
+ | conceptrules_v2 | 0.99 |
96
+ | zero-shot-label-nli | 0.74 |
97
+ | scone | 0.97 |
98
+ | monli | 0.98 |
99
+ | SpaceNLI | 1 |
100
+ | propsegment/nli | 0.91 |
101
+ | SDOH-NLI | 1 |
102
+ | scifact_entailment | 0.78 |
103
+ | AdjectiveScaleProbe-nli | 0.99 |
104
+ | resnli | 0.99 |
105
+ | semantic_fragments_nli | 0.99 |
106
+ | dataset_train_nli | 0.88 |
107
+ | ruletaker | 0.91 |
108
  | PARARULE-Plus | 1 |
109
+ | logical-entailment | 0.73 |
110
+ | nope | 0.54 |
111
+ | LogicNLI | 0.65 |
112
+ | contract-nli/contractnli_a/seg | 0.87 |
113
+ | contract-nli/contractnli_b/full | 0.78 |
114
+ | nli4ct_semeval2024 | 0.6 |
115
+ | biosift-nli | 0.88 |
116
+ | SIGA-nli | 0.54 |
117
+ | FOL-nli | 0.71 |
118
+ | doc-nli | 0.82 |
119
+ | mctest-nli | 0.89 |
120
+ | idioms-nli | 0.86 |
121
+ | lifecycle-entailment | 0.71 |
122
+ | MSciNLI | 0.82 |
123
+ | hover-3way/nli | 0.9 |
124
+ | seahorse_summarization_evaluation | 0.82 |
125
+ | babi_nli | 0.94 |
126
+ | gen_debiased_nli | 0.9 |
127
 
128
  # Usage
129