PurplelinkPL commited on
Commit
c464737
·
verified ·
1 Parent(s): 9750131

Upload 10 files

Browse files
config.json CHANGED
@@ -5,7 +5,7 @@
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "attention_probs_dropout_prob": 0.1,
8
- "attn_implementation": "sdpa",
9
  "bos_token_id": 50281,
10
  "classifier_activation": "gelu",
11
  "classifier_bias": false,
@@ -22,10 +22,10 @@
22
  "gradient_checkpointing": false,
23
  "hidden_activation": "gelu",
24
  "hidden_dropout_prob": 0.1,
25
- "hidden_size": 1024,
26
  "initializer_cutoff_factor": 2.0,
27
  "initializer_range": 0.02,
28
- "intermediate_size": 2624,
29
  "layer_norm_eps": 1e-05,
30
  "local_attention": 128,
31
  "local_rope_theta": 10000.0,
@@ -35,8 +35,8 @@
35
  "model_type": "modernbert",
36
  "norm_bias": false,
37
  "norm_eps": 1e-05,
38
- "num_attention_heads": 16,
39
- "num_hidden_layers": 28,
40
  "pad_token_id": 50283,
41
  "position_embedding_type": "absolute",
42
  "repad_logits_with_grad": false,
 
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "attention_probs_dropout_prob": 0.1,
8
+ "attn_implementation": "flash_attention_2",
9
  "bos_token_id": 50281,
10
  "classifier_activation": "gelu",
11
  "classifier_bias": false,
 
22
  "gradient_checkpointing": false,
23
  "hidden_activation": "gelu",
24
  "hidden_dropout_prob": 0.1,
25
+ "hidden_size": 768,
26
  "initializer_cutoff_factor": 2.0,
27
  "initializer_range": 0.02,
28
+ "intermediate_size": 1152,
29
  "layer_norm_eps": 1e-05,
30
  "local_attention": 128,
31
  "local_rope_theta": 10000.0,
 
35
  "model_type": "modernbert",
36
  "norm_bias": false,
37
  "norm_eps": 1e-05,
38
+ "num_attention_heads": 12,
39
+ "num_hidden_layers": 22,
40
  "pad_token_id": 50283,
41
  "position_embedding_type": "absolute",
42
  "repad_logits_with_grad": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d99e23c9c5e198f1a7197faedddc865f198f1ac2bcdc84e3402a78043d8ae5c8
3
- size 1583544840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68dfbe915ff4e03024cebbe33bde59cbf6b6d263e48d28395b6093519870427f
3
+ size 598635032
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89e76bdb45a3ed6a7ec73b67b37c49b640648f98ae23ad87e193ed93ffa5da0c
3
- size 3167201739
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c99ecdaaf664092be0234fe077bbcd25baa9813c62c8c46bdea2a42455c5ff
3
+ size 1197359627
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73ce21f5865b864b77c3be4b62e9a259611aacea0d4451a245cb98c83253561d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c78b4c639ae6ded426a01aaa0cfe34a255d9fc38024fa012efae708fa63f88
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d0df6d48ac6c8d2a3fe965d9b7a645f9b425ec23c31765b3bbc57f64cf0fee9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d057880b7192dde278d129dfeefa0076ad8bd0f56219fa25a8eb938564ee0f19
3
  size 1465
tokenizer_config.json CHANGED
@@ -933,20 +933,13 @@
933
  "cls_token": "[CLS]",
934
  "extra_special_tokens": {},
935
  "mask_token": "[MASK]",
936
- "max_length": 512,
937
  "model_input_names": [
938
  "input_ids",
939
  "attention_mask"
940
  ],
941
  "model_max_length": 512,
942
- "pad_to_multiple_of": null,
943
  "pad_token": "[PAD]",
944
- "pad_token_type_id": 0,
945
- "padding_side": "right",
946
  "sep_token": "[SEP]",
947
- "stride": 0,
948
  "tokenizer_class": "PreTrainedTokenizerFast",
949
- "truncation_side": "right",
950
- "truncation_strategy": "longest_first",
951
  "unk_token": "[UNK]"
952
  }
 
933
  "cls_token": "[CLS]",
934
  "extra_special_tokens": {},
935
  "mask_token": "[MASK]",
 
936
  "model_input_names": [
937
  "input_ids",
938
  "attention_mask"
939
  ],
940
  "model_max_length": 512,
 
941
  "pad_token": "[PAD]",
 
 
942
  "sep_token": "[SEP]",
 
943
  "tokenizer_class": "PreTrainedTokenizerFast",
 
 
944
  "unk_token": "[UNK]"
945
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f45ebc3b5cfb179371730040033cc60cf9d8216007feceb0c4d7cbbf1cda1e6
3
- size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f786a1ab971b3519761e9e75ce2bb6dc37b3b2f73ad1120f8a4c1f996b3a44
3
+ size 5777