Upload 10 files
Browse files- config.json +5 -5
- model.safetensors +2 -2
- optimizer.pt +2 -2
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- tokenizer_config.json +0 -7
- trainer_state.json +0 -0
- training_args.bin +2 -2
config.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
"attention_bias": false,
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
"attention_probs_dropout_prob": 0.1,
|
| 8 |
-
"attn_implementation": "
|
| 9 |
"bos_token_id": 50281,
|
| 10 |
"classifier_activation": "gelu",
|
| 11 |
"classifier_bias": false,
|
|
@@ -22,10 +22,10 @@
|
|
| 22 |
"gradient_checkpointing": false,
|
| 23 |
"hidden_activation": "gelu",
|
| 24 |
"hidden_dropout_prob": 0.1,
|
| 25 |
-
"hidden_size":
|
| 26 |
"initializer_cutoff_factor": 2.0,
|
| 27 |
"initializer_range": 0.02,
|
| 28 |
-
"intermediate_size":
|
| 29 |
"layer_norm_eps": 1e-05,
|
| 30 |
"local_attention": 128,
|
| 31 |
"local_rope_theta": 10000.0,
|
|
@@ -35,8 +35,8 @@
|
|
| 35 |
"model_type": "modernbert",
|
| 36 |
"norm_bias": false,
|
| 37 |
"norm_eps": 1e-05,
|
| 38 |
-
"num_attention_heads":
|
| 39 |
-
"num_hidden_layers":
|
| 40 |
"pad_token_id": 50283,
|
| 41 |
"position_embedding_type": "absolute",
|
| 42 |
"repad_logits_with_grad": false,
|
|
|
|
| 5 |
"attention_bias": false,
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
"attention_probs_dropout_prob": 0.1,
|
| 8 |
+
"attn_implementation": "flash_attention_2",
|
| 9 |
"bos_token_id": 50281,
|
| 10 |
"classifier_activation": "gelu",
|
| 11 |
"classifier_bias": false,
|
|
|
|
| 22 |
"gradient_checkpointing": false,
|
| 23 |
"hidden_activation": "gelu",
|
| 24 |
"hidden_dropout_prob": 0.1,
|
| 25 |
+
"hidden_size": 768,
|
| 26 |
"initializer_cutoff_factor": 2.0,
|
| 27 |
"initializer_range": 0.02,
|
| 28 |
+
"intermediate_size": 1152,
|
| 29 |
"layer_norm_eps": 1e-05,
|
| 30 |
"local_attention": 128,
|
| 31 |
"local_rope_theta": 10000.0,
|
|
|
|
| 35 |
"model_type": "modernbert",
|
| 36 |
"norm_bias": false,
|
| 37 |
"norm_eps": 1e-05,
|
| 38 |
+
"num_attention_heads": 12,
|
| 39 |
+
"num_hidden_layers": 22,
|
| 40 |
"pad_token_id": 50283,
|
| 41 |
"position_embedding_type": "absolute",
|
| 42 |
"repad_logits_with_grad": false,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68dfbe915ff4e03024cebbe33bde59cbf6b6d263e48d28395b6093519870427f
|
| 3 |
+
size 598635032
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7c99ecdaaf664092be0234fe077bbcd25baa9813c62c8c46bdea2a42455c5ff
|
| 3 |
+
size 1197359627
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08c78b4c639ae6ded426a01aaa0cfe34a255d9fc38024fa012efae708fa63f88
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d057880b7192dde278d129dfeefa0076ad8bd0f56219fa25a8eb938564ee0f19
|
| 3 |
size 1465
|
tokenizer_config.json
CHANGED
|
@@ -933,20 +933,13 @@
|
|
| 933 |
"cls_token": "[CLS]",
|
| 934 |
"extra_special_tokens": {},
|
| 935 |
"mask_token": "[MASK]",
|
| 936 |
-
"max_length": 512,
|
| 937 |
"model_input_names": [
|
| 938 |
"input_ids",
|
| 939 |
"attention_mask"
|
| 940 |
],
|
| 941 |
"model_max_length": 512,
|
| 942 |
-
"pad_to_multiple_of": null,
|
| 943 |
"pad_token": "[PAD]",
|
| 944 |
-
"pad_token_type_id": 0,
|
| 945 |
-
"padding_side": "right",
|
| 946 |
"sep_token": "[SEP]",
|
| 947 |
-
"stride": 0,
|
| 948 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 949 |
-
"truncation_side": "right",
|
| 950 |
-
"truncation_strategy": "longest_first",
|
| 951 |
"unk_token": "[UNK]"
|
| 952 |
}
|
|
|
|
| 933 |
"cls_token": "[CLS]",
|
| 934 |
"extra_special_tokens": {},
|
| 935 |
"mask_token": "[MASK]",
|
|
|
|
| 936 |
"model_input_names": [
|
| 937 |
"input_ids",
|
| 938 |
"attention_mask"
|
| 939 |
],
|
| 940 |
"model_max_length": 512,
|
|
|
|
| 941 |
"pad_token": "[PAD]",
|
|
|
|
|
|
|
| 942 |
"sep_token": "[SEP]",
|
|
|
|
| 943 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
|
|
|
|
|
|
| 944 |
"unk_token": "[UNK]"
|
| 945 |
}
|
trainer_state.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4f786a1ab971b3519761e9e75ce2bb6dc37b3b2f73ad1120f8a4c1f996b3a44
|
| 3 |
+
size 5777
|