Upload 10 files

Files changed (8) hide show

config.json CHANGED Viewed

@@ -5,7 +5,7 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "attention_probs_dropout_prob": 0.1,
-  "attn_implementation": "sdpa",
   "bos_token_id": 50281,
   "classifier_activation": "gelu",
   "classifier_bias": false,
@@ -22,10 +22,10 @@
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 1024,
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
-  "intermediate_size": 2624,
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "local_rope_theta": 10000.0,
@@ -35,8 +35,8 @@
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
-  "num_attention_heads": 16,
-  "num_hidden_layers": 28,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "repad_logits_with_grad": false,

   "attention_bias": false,
   "attention_dropout": 0.0,
   "attention_probs_dropout_prob": 0.1,
+  "attn_implementation": "flash_attention_2",
   "bos_token_id": 50281,
   "classifier_activation": "gelu",
   "classifier_bias": false,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
+  "intermediate_size": 1152,
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "local_rope_theta": 10000.0,
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "repad_logits_with_grad": false,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d99e23c9c5e198f1a7197faedddc865f198f1ac2bcdc84e3402a78043d8ae5c8
-size 1583544840

 version https://git-lfs.github.com/spec/v1
+oid sha256:68dfbe915ff4e03024cebbe33bde59cbf6b6d263e48d28395b6093519870427f
+size 598635032

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89e76bdb45a3ed6a7ec73b67b37c49b640648f98ae23ad87e193ed93ffa5da0c
-size 3167201739

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7c99ecdaaf664092be0234fe077bbcd25baa9813c62c8c46bdea2a42455c5ff
+size 1197359627

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73ce21f5865b864b77c3be4b62e9a259611aacea0d4451a245cb98c83253561d
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:08c78b4c639ae6ded426a01aaa0cfe34a255d9fc38024fa012efae708fa63f88
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d0df6d48ac6c8d2a3fe965d9b7a645f9b425ec23c31765b3bbc57f64cf0fee9
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:d057880b7192dde278d129dfeefa0076ad8bd0f56219fa25a8eb938564ee0f19
 size 1465

tokenizer_config.json CHANGED Viewed

@@ -933,20 +933,13 @@
   "cls_token": "[CLS]",
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
-  "max_length": 512,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 512,
-  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
   "sep_token": "[SEP]",
-  "stride": 0,
   "tokenizer_class": "PreTrainedTokenizerFast",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

   "cls_token": "[CLS]",
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",
   "unk_token": "[UNK]"
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f45ebc3b5cfb179371730040033cc60cf9d8216007feceb0c4d7cbbf1cda1e6
-size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4f786a1ab971b3519761e9e75ce2bb6dc37b3b2f73ad1120f8a4c1f996b3a44
+size 5777