imdatta0 commited on
Commit
f045612
·
verified ·
1 Parent(s): 5762b49

Upload GptOssForCausalLM

Browse files
model-00001-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dfc746024f0959f61506acee7e68b73c08abd95086320ca0cf9a00eaad5f8c8
3
- size 4968986544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7405ebb3dba293e987385b856dc066e0f54864ada27c6c9dedde82cc8052e9ab
3
+ size 4968986592
model-00002-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05f4974c3d0b34ad4a65c9e69943657622038bf3b0295b1ec77378830ae87edc
3
- size 4972362872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b502e596b87ca4b0b863803100aad4daad937d58b968260c7b0683fe03952703
3
+ size 4972362912
model-00003-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ea6a0d0fd8e16e7fa65ee3fbb9fab5464dac005c3e55a046dcc447b4a0f8d32
3
- size 4972362872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bace628356d060992f000fef2d19e6ce79e25fb61cd5b37a65443a1a5f9a975c
3
+ size 4972362912
model-00004-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b3a2ca1d60ed8d01021cba531b0907b924371ab0ad105bf0c05696867687dd7
3
- size 4972363056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c19d33dabbf18a9cead94d4591cafde66159fbc2a9b3c537841cf0b2ba06fe
3
+ size 4972363096
model-00005-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ea5d0168556893d32fd3b1bd898fe5dd7f6da39795a982cc4ca32db1c023299
3
- size 4972363296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb1ed1de1be0829515b135010615bd5d7f619cd63a9414b9b6195911012a799a
3
+ size 4972363336
model-00006-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e15caab3bf704743d5af3050a83cb105a96cd515bf75f3e0824dca038cf3c94
3
- size 4972363296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:839812f041f34cb0e9e7b2d997da1bdd7831fe62b9ccbea33176d5847b3a232e
3
+ size 4972363336
model-00007-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200657a21c776d0257b7b1e87c9ef63fad5747c97943fb7b8d8e3b8fec8d4d6f
3
- size 4972363288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f184d228be8d23bb21749021752120c04308a306e54660435454325567cd422
3
+ size 4972363328
model-00008-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ba35a9058353ac6344d56af61c28112934afd01f5e4183f0ce1159889da6d47
3
- size 4972363288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9887e040dd583f8f78ff2e0e7ad238b8bb36c1f80633bb09e15d12ea37e0c2a2
3
+ size 4972363328
model.safetensors.index.json CHANGED
@@ -135,8 +135,8 @@
135
  "model.layers.0.mlp.experts.gate_up_projs.8.weight": "model-00001-of-00009.safetensors",
136
  "model.layers.0.mlp.experts.gate_up_projs.9.bias": "model-00001-of-00009.safetensors",
137
  "model.layers.0.mlp.experts.gate_up_projs.9.weight": "model-00001-of-00009.safetensors",
138
- "model.layers.0.mlp.router.bias": "model-00001-of-00009.safetensors",
139
- "model.layers.0.mlp.router.weight": "model-00001-of-00009.safetensors",
140
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
141
  "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
142
  "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
@@ -276,8 +276,8 @@
276
  "model.layers.1.mlp.experts.gate_up_projs.8.weight": "model-00001-of-00009.safetensors",
277
  "model.layers.1.mlp.experts.gate_up_projs.9.bias": "model-00001-of-00009.safetensors",
278
  "model.layers.1.mlp.experts.gate_up_projs.9.weight": "model-00001-of-00009.safetensors",
279
- "model.layers.1.mlp.router.bias": "model-00001-of-00009.safetensors",
280
- "model.layers.1.mlp.router.weight": "model-00001-of-00009.safetensors",
281
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
282
  "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
283
  "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
@@ -417,8 +417,8 @@
417
  "model.layers.10.mlp.experts.gate_up_projs.8.weight": "model-00004-of-00009.safetensors",
418
  "model.layers.10.mlp.experts.gate_up_projs.9.bias": "model-00004-of-00009.safetensors",
419
  "model.layers.10.mlp.experts.gate_up_projs.9.weight": "model-00004-of-00009.safetensors",
420
- "model.layers.10.mlp.router.bias": "model-00004-of-00009.safetensors",
421
- "model.layers.10.mlp.router.weight": "model-00004-of-00009.safetensors",
422
  "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
423
  "model.layers.10.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
424
  "model.layers.10.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
@@ -558,8 +558,8 @@
558
  "model.layers.11.mlp.experts.gate_up_projs.8.weight": "model-00004-of-00009.safetensors",
559
  "model.layers.11.mlp.experts.gate_up_projs.9.bias": "model-00004-of-00009.safetensors",
560
  "model.layers.11.mlp.experts.gate_up_projs.9.weight": "model-00004-of-00009.safetensors",
561
- "model.layers.11.mlp.router.bias": "model-00004-of-00009.safetensors",
562
- "model.layers.11.mlp.router.weight": "model-00004-of-00009.safetensors",
563
  "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
564
  "model.layers.11.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
565
  "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
@@ -699,8 +699,8 @@
699
  "model.layers.12.mlp.experts.gate_up_projs.8.weight": "model-00005-of-00009.safetensors",
700
  "model.layers.12.mlp.experts.gate_up_projs.9.bias": "model-00005-of-00009.safetensors",
701
  "model.layers.12.mlp.experts.gate_up_projs.9.weight": "model-00005-of-00009.safetensors",
702
- "model.layers.12.mlp.router.bias": "model-00005-of-00009.safetensors",
703
- "model.layers.12.mlp.router.weight": "model-00005-of-00009.safetensors",
704
  "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
705
  "model.layers.12.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
706
  "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
@@ -840,8 +840,8 @@
840
  "model.layers.13.mlp.experts.gate_up_projs.8.weight": "model-00005-of-00009.safetensors",
841
  "model.layers.13.mlp.experts.gate_up_projs.9.bias": "model-00005-of-00009.safetensors",
842
  "model.layers.13.mlp.experts.gate_up_projs.9.weight": "model-00005-of-00009.safetensors",
843
- "model.layers.13.mlp.router.bias": "model-00005-of-00009.safetensors",
844
- "model.layers.13.mlp.router.weight": "model-00005-of-00009.safetensors",
845
  "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
846
  "model.layers.13.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
847
  "model.layers.13.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
@@ -981,8 +981,8 @@
981
  "model.layers.14.mlp.experts.gate_up_projs.8.weight": "model-00005-of-00009.safetensors",
982
  "model.layers.14.mlp.experts.gate_up_projs.9.bias": "model-00005-of-00009.safetensors",
983
  "model.layers.14.mlp.experts.gate_up_projs.9.weight": "model-00005-of-00009.safetensors",
984
- "model.layers.14.mlp.router.bias": "model-00005-of-00009.safetensors",
985
- "model.layers.14.mlp.router.weight": "model-00005-of-00009.safetensors",
986
  "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
987
  "model.layers.14.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
988
  "model.layers.14.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
@@ -1122,8 +1122,8 @@
1122
  "model.layers.15.mlp.experts.gate_up_projs.8.weight": "model-00006-of-00009.safetensors",
1123
  "model.layers.15.mlp.experts.gate_up_projs.9.bias": "model-00006-of-00009.safetensors",
1124
  "model.layers.15.mlp.experts.gate_up_projs.9.weight": "model-00006-of-00009.safetensors",
1125
- "model.layers.15.mlp.router.bias": "model-00006-of-00009.safetensors",
1126
- "model.layers.15.mlp.router.weight": "model-00006-of-00009.safetensors",
1127
  "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
1128
  "model.layers.15.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
1129
  "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
@@ -1263,8 +1263,8 @@
1263
  "model.layers.16.mlp.experts.gate_up_projs.8.weight": "model-00006-of-00009.safetensors",
1264
  "model.layers.16.mlp.experts.gate_up_projs.9.bias": "model-00006-of-00009.safetensors",
1265
  "model.layers.16.mlp.experts.gate_up_projs.9.weight": "model-00006-of-00009.safetensors",
1266
- "model.layers.16.mlp.router.bias": "model-00006-of-00009.safetensors",
1267
- "model.layers.16.mlp.router.weight": "model-00006-of-00009.safetensors",
1268
  "model.layers.16.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
1269
  "model.layers.16.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
1270
  "model.layers.16.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
@@ -1404,8 +1404,8 @@
1404
  "model.layers.17.mlp.experts.gate_up_projs.8.weight": "model-00006-of-00009.safetensors",
1405
  "model.layers.17.mlp.experts.gate_up_projs.9.bias": "model-00006-of-00009.safetensors",
1406
  "model.layers.17.mlp.experts.gate_up_projs.9.weight": "model-00006-of-00009.safetensors",
1407
- "model.layers.17.mlp.router.bias": "model-00006-of-00009.safetensors",
1408
- "model.layers.17.mlp.router.weight": "model-00006-of-00009.safetensors",
1409
  "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
1410
  "model.layers.17.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
1411
  "model.layers.17.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
@@ -1545,8 +1545,8 @@
1545
  "model.layers.18.mlp.experts.gate_up_projs.8.weight": "model-00007-of-00009.safetensors",
1546
  "model.layers.18.mlp.experts.gate_up_projs.9.bias": "model-00007-of-00009.safetensors",
1547
  "model.layers.18.mlp.experts.gate_up_projs.9.weight": "model-00007-of-00009.safetensors",
1548
- "model.layers.18.mlp.router.bias": "model-00007-of-00009.safetensors",
1549
- "model.layers.18.mlp.router.weight": "model-00007-of-00009.safetensors",
1550
  "model.layers.18.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
1551
  "model.layers.18.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
1552
  "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
@@ -1686,8 +1686,8 @@
1686
  "model.layers.19.mlp.experts.gate_up_projs.8.weight": "model-00007-of-00009.safetensors",
1687
  "model.layers.19.mlp.experts.gate_up_projs.9.bias": "model-00007-of-00009.safetensors",
1688
  "model.layers.19.mlp.experts.gate_up_projs.9.weight": "model-00007-of-00009.safetensors",
1689
- "model.layers.19.mlp.router.bias": "model-00007-of-00009.safetensors",
1690
- "model.layers.19.mlp.router.weight": "model-00007-of-00009.safetensors",
1691
  "model.layers.19.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
1692
  "model.layers.19.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
1693
  "model.layers.19.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
@@ -1827,8 +1827,8 @@
1827
  "model.layers.2.mlp.experts.gate_up_projs.8.weight": "model-00001-of-00009.safetensors",
1828
  "model.layers.2.mlp.experts.gate_up_projs.9.bias": "model-00001-of-00009.safetensors",
1829
  "model.layers.2.mlp.experts.gate_up_projs.9.weight": "model-00001-of-00009.safetensors",
1830
- "model.layers.2.mlp.router.bias": "model-00001-of-00009.safetensors",
1831
- "model.layers.2.mlp.router.weight": "model-00001-of-00009.safetensors",
1832
  "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
1833
  "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
1834
  "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
@@ -1968,8 +1968,8 @@
1968
  "model.layers.20.mlp.experts.gate_up_projs.8.weight": "model-00007-of-00009.safetensors",
1969
  "model.layers.20.mlp.experts.gate_up_projs.9.bias": "model-00007-of-00009.safetensors",
1970
  "model.layers.20.mlp.experts.gate_up_projs.9.weight": "model-00007-of-00009.safetensors",
1971
- "model.layers.20.mlp.router.bias": "model-00007-of-00009.safetensors",
1972
- "model.layers.20.mlp.router.weight": "model-00007-of-00009.safetensors",
1973
  "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
1974
  "model.layers.20.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
1975
  "model.layers.20.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
@@ -2109,8 +2109,8 @@
2109
  "model.layers.21.mlp.experts.gate_up_projs.8.weight": "model-00008-of-00009.safetensors",
2110
  "model.layers.21.mlp.experts.gate_up_projs.9.bias": "model-00008-of-00009.safetensors",
2111
  "model.layers.21.mlp.experts.gate_up_projs.9.weight": "model-00008-of-00009.safetensors",
2112
- "model.layers.21.mlp.router.bias": "model-00008-of-00009.safetensors",
2113
- "model.layers.21.mlp.router.weight": "model-00008-of-00009.safetensors",
2114
  "model.layers.21.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
2115
  "model.layers.21.self_attn.k_proj.bias": "model-00008-of-00009.safetensors",
2116
  "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
@@ -2250,8 +2250,8 @@
2250
  "model.layers.22.mlp.experts.gate_up_projs.8.weight": "model-00008-of-00009.safetensors",
2251
  "model.layers.22.mlp.experts.gate_up_projs.9.bias": "model-00008-of-00009.safetensors",
2252
  "model.layers.22.mlp.experts.gate_up_projs.9.weight": "model-00008-of-00009.safetensors",
2253
- "model.layers.22.mlp.router.bias": "model-00008-of-00009.safetensors",
2254
- "model.layers.22.mlp.router.weight": "model-00008-of-00009.safetensors",
2255
  "model.layers.22.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
2256
  "model.layers.22.self_attn.k_proj.bias": "model-00008-of-00009.safetensors",
2257
  "model.layers.22.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
@@ -2391,8 +2391,8 @@
2391
  "model.layers.23.mlp.experts.gate_up_projs.8.weight": "model-00008-of-00009.safetensors",
2392
  "model.layers.23.mlp.experts.gate_up_projs.9.bias": "model-00008-of-00009.safetensors",
2393
  "model.layers.23.mlp.experts.gate_up_projs.9.weight": "model-00008-of-00009.safetensors",
2394
- "model.layers.23.mlp.router.bias": "model-00008-of-00009.safetensors",
2395
- "model.layers.23.mlp.router.weight": "model-00008-of-00009.safetensors",
2396
  "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00009.safetensors",
2397
  "model.layers.23.self_attn.k_proj.bias": "model-00008-of-00009.safetensors",
2398
  "model.layers.23.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
@@ -2532,8 +2532,8 @@
2532
  "model.layers.3.mlp.experts.gate_up_projs.8.weight": "model-00002-of-00009.safetensors",
2533
  "model.layers.3.mlp.experts.gate_up_projs.9.bias": "model-00002-of-00009.safetensors",
2534
  "model.layers.3.mlp.experts.gate_up_projs.9.weight": "model-00002-of-00009.safetensors",
2535
- "model.layers.3.mlp.router.bias": "model-00002-of-00009.safetensors",
2536
- "model.layers.3.mlp.router.weight": "model-00002-of-00009.safetensors",
2537
  "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
2538
  "model.layers.3.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
2539
  "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
@@ -2673,8 +2673,8 @@
2673
  "model.layers.4.mlp.experts.gate_up_projs.8.weight": "model-00002-of-00009.safetensors",
2674
  "model.layers.4.mlp.experts.gate_up_projs.9.bias": "model-00002-of-00009.safetensors",
2675
  "model.layers.4.mlp.experts.gate_up_projs.9.weight": "model-00002-of-00009.safetensors",
2676
- "model.layers.4.mlp.router.bias": "model-00002-of-00009.safetensors",
2677
- "model.layers.4.mlp.router.weight": "model-00002-of-00009.safetensors",
2678
  "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
2679
  "model.layers.4.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
2680
  "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
@@ -2814,8 +2814,8 @@
2814
  "model.layers.5.mlp.experts.gate_up_projs.8.weight": "model-00002-of-00009.safetensors",
2815
  "model.layers.5.mlp.experts.gate_up_projs.9.bias": "model-00002-of-00009.safetensors",
2816
  "model.layers.5.mlp.experts.gate_up_projs.9.weight": "model-00002-of-00009.safetensors",
2817
- "model.layers.5.mlp.router.bias": "model-00002-of-00009.safetensors",
2818
- "model.layers.5.mlp.router.weight": "model-00002-of-00009.safetensors",
2819
  "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
2820
  "model.layers.5.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
2821
  "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
@@ -2955,8 +2955,8 @@
2955
  "model.layers.6.mlp.experts.gate_up_projs.8.weight": "model-00003-of-00009.safetensors",
2956
  "model.layers.6.mlp.experts.gate_up_projs.9.bias": "model-00003-of-00009.safetensors",
2957
  "model.layers.6.mlp.experts.gate_up_projs.9.weight": "model-00003-of-00009.safetensors",
2958
- "model.layers.6.mlp.router.bias": "model-00003-of-00009.safetensors",
2959
- "model.layers.6.mlp.router.weight": "model-00003-of-00009.safetensors",
2960
  "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
2961
  "model.layers.6.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
2962
  "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
@@ -3096,8 +3096,8 @@
3096
  "model.layers.7.mlp.experts.gate_up_projs.8.weight": "model-00003-of-00009.safetensors",
3097
  "model.layers.7.mlp.experts.gate_up_projs.9.bias": "model-00003-of-00009.safetensors",
3098
  "model.layers.7.mlp.experts.gate_up_projs.9.weight": "model-00003-of-00009.safetensors",
3099
- "model.layers.7.mlp.router.bias": "model-00003-of-00009.safetensors",
3100
- "model.layers.7.mlp.router.weight": "model-00003-of-00009.safetensors",
3101
  "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
3102
  "model.layers.7.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
3103
  "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
@@ -3237,8 +3237,8 @@
3237
  "model.layers.8.mlp.experts.gate_up_projs.8.weight": "model-00003-of-00009.safetensors",
3238
  "model.layers.8.mlp.experts.gate_up_projs.9.bias": "model-00003-of-00009.safetensors",
3239
  "model.layers.8.mlp.experts.gate_up_projs.9.weight": "model-00003-of-00009.safetensors",
3240
- "model.layers.8.mlp.router.bias": "model-00003-of-00009.safetensors",
3241
- "model.layers.8.mlp.router.weight": "model-00003-of-00009.safetensors",
3242
  "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
3243
  "model.layers.8.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
3244
  "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
@@ -3378,8 +3378,8 @@
3378
  "model.layers.9.mlp.experts.gate_up_projs.8.weight": "model-00004-of-00009.safetensors",
3379
  "model.layers.9.mlp.experts.gate_up_projs.9.bias": "model-00004-of-00009.safetensors",
3380
  "model.layers.9.mlp.experts.gate_up_projs.9.weight": "model-00004-of-00009.safetensors",
3381
- "model.layers.9.mlp.router.bias": "model-00004-of-00009.safetensors",
3382
- "model.layers.9.mlp.router.weight": "model-00004-of-00009.safetensors",
3383
  "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
3384
  "model.layers.9.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
3385
  "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
 
135
  "model.layers.0.mlp.experts.gate_up_projs.8.weight": "model-00001-of-00009.safetensors",
136
  "model.layers.0.mlp.experts.gate_up_projs.9.bias": "model-00001-of-00009.safetensors",
137
  "model.layers.0.mlp.experts.gate_up_projs.9.weight": "model-00001-of-00009.safetensors",
138
+ "model.layers.0.mlp.router.linear.bias": "model-00001-of-00009.safetensors",
139
+ "model.layers.0.mlp.router.linear.weight": "model-00001-of-00009.safetensors",
140
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
141
  "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
142
  "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
 
276
  "model.layers.1.mlp.experts.gate_up_projs.8.weight": "model-00001-of-00009.safetensors",
277
  "model.layers.1.mlp.experts.gate_up_projs.9.bias": "model-00001-of-00009.safetensors",
278
  "model.layers.1.mlp.experts.gate_up_projs.9.weight": "model-00001-of-00009.safetensors",
279
+ "model.layers.1.mlp.router.linear.bias": "model-00001-of-00009.safetensors",
280
+ "model.layers.1.mlp.router.linear.weight": "model-00001-of-00009.safetensors",
281
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
282
  "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
283
  "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
 
417
  "model.layers.10.mlp.experts.gate_up_projs.8.weight": "model-00004-of-00009.safetensors",
418
  "model.layers.10.mlp.experts.gate_up_projs.9.bias": "model-00004-of-00009.safetensors",
419
  "model.layers.10.mlp.experts.gate_up_projs.9.weight": "model-00004-of-00009.safetensors",
420
+ "model.layers.10.mlp.router.linear.bias": "model-00004-of-00009.safetensors",
421
+ "model.layers.10.mlp.router.linear.weight": "model-00004-of-00009.safetensors",
422
  "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
423
  "model.layers.10.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
424
  "model.layers.10.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
 
558
  "model.layers.11.mlp.experts.gate_up_projs.8.weight": "model-00004-of-00009.safetensors",
559
  "model.layers.11.mlp.experts.gate_up_projs.9.bias": "model-00004-of-00009.safetensors",
560
  "model.layers.11.mlp.experts.gate_up_projs.9.weight": "model-00004-of-00009.safetensors",
561
+ "model.layers.11.mlp.router.linear.bias": "model-00004-of-00009.safetensors",
562
+ "model.layers.11.mlp.router.linear.weight": "model-00004-of-00009.safetensors",
563
  "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
564
  "model.layers.11.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
565
  "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
 
699
  "model.layers.12.mlp.experts.gate_up_projs.8.weight": "model-00005-of-00009.safetensors",
700
  "model.layers.12.mlp.experts.gate_up_projs.9.bias": "model-00005-of-00009.safetensors",
701
  "model.layers.12.mlp.experts.gate_up_projs.9.weight": "model-00005-of-00009.safetensors",
702
+ "model.layers.12.mlp.router.linear.bias": "model-00005-of-00009.safetensors",
703
+ "model.layers.12.mlp.router.linear.weight": "model-00005-of-00009.safetensors",
704
  "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
705
  "model.layers.12.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
706
  "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
 
840
  "model.layers.13.mlp.experts.gate_up_projs.8.weight": "model-00005-of-00009.safetensors",
841
  "model.layers.13.mlp.experts.gate_up_projs.9.bias": "model-00005-of-00009.safetensors",
842
  "model.layers.13.mlp.experts.gate_up_projs.9.weight": "model-00005-of-00009.safetensors",
843
+ "model.layers.13.mlp.router.linear.bias": "model-00005-of-00009.safetensors",
844
+ "model.layers.13.mlp.router.linear.weight": "model-00005-of-00009.safetensors",
845
  "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
846
  "model.layers.13.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
847
  "model.layers.13.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
 
981
  "model.layers.14.mlp.experts.gate_up_projs.8.weight": "model-00005-of-00009.safetensors",
982
  "model.layers.14.mlp.experts.gate_up_projs.9.bias": "model-00005-of-00009.safetensors",
983
  "model.layers.14.mlp.experts.gate_up_projs.9.weight": "model-00005-of-00009.safetensors",
984
+ "model.layers.14.mlp.router.linear.bias": "model-00005-of-00009.safetensors",
985
+ "model.layers.14.mlp.router.linear.weight": "model-00005-of-00009.safetensors",
986
  "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
987
  "model.layers.14.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
988
  "model.layers.14.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
 
1122
  "model.layers.15.mlp.experts.gate_up_projs.8.weight": "model-00006-of-00009.safetensors",
1123
  "model.layers.15.mlp.experts.gate_up_projs.9.bias": "model-00006-of-00009.safetensors",
1124
  "model.layers.15.mlp.experts.gate_up_projs.9.weight": "model-00006-of-00009.safetensors",
1125
+ "model.layers.15.mlp.router.linear.bias": "model-00006-of-00009.safetensors",
1126
+ "model.layers.15.mlp.router.linear.weight": "model-00006-of-00009.safetensors",
1127
  "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
1128
  "model.layers.15.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
1129
  "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
 
1263
  "model.layers.16.mlp.experts.gate_up_projs.8.weight": "model-00006-of-00009.safetensors",
1264
  "model.layers.16.mlp.experts.gate_up_projs.9.bias": "model-00006-of-00009.safetensors",
1265
  "model.layers.16.mlp.experts.gate_up_projs.9.weight": "model-00006-of-00009.safetensors",
1266
+ "model.layers.16.mlp.router.linear.bias": "model-00006-of-00009.safetensors",
1267
+ "model.layers.16.mlp.router.linear.weight": "model-00006-of-00009.safetensors",
1268
  "model.layers.16.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
1269
  "model.layers.16.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
1270
  "model.layers.16.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
 
1404
  "model.layers.17.mlp.experts.gate_up_projs.8.weight": "model-00006-of-00009.safetensors",
1405
  "model.layers.17.mlp.experts.gate_up_projs.9.bias": "model-00006-of-00009.safetensors",
1406
  "model.layers.17.mlp.experts.gate_up_projs.9.weight": "model-00006-of-00009.safetensors",
1407
+ "model.layers.17.mlp.router.linear.bias": "model-00006-of-00009.safetensors",
1408
+ "model.layers.17.mlp.router.linear.weight": "model-00006-of-00009.safetensors",
1409
  "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
1410
  "model.layers.17.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
1411
  "model.layers.17.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
 
1545
  "model.layers.18.mlp.experts.gate_up_projs.8.weight": "model-00007-of-00009.safetensors",
1546
  "model.layers.18.mlp.experts.gate_up_projs.9.bias": "model-00007-of-00009.safetensors",
1547
  "model.layers.18.mlp.experts.gate_up_projs.9.weight": "model-00007-of-00009.safetensors",
1548
+ "model.layers.18.mlp.router.linear.bias": "model-00007-of-00009.safetensors",
1549
+ "model.layers.18.mlp.router.linear.weight": "model-00007-of-00009.safetensors",
1550
  "model.layers.18.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
1551
  "model.layers.18.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
1552
  "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
 
1686
  "model.layers.19.mlp.experts.gate_up_projs.8.weight": "model-00007-of-00009.safetensors",
1687
  "model.layers.19.mlp.experts.gate_up_projs.9.bias": "model-00007-of-00009.safetensors",
1688
  "model.layers.19.mlp.experts.gate_up_projs.9.weight": "model-00007-of-00009.safetensors",
1689
+ "model.layers.19.mlp.router.linear.bias": "model-00007-of-00009.safetensors",
1690
+ "model.layers.19.mlp.router.linear.weight": "model-00007-of-00009.safetensors",
1691
  "model.layers.19.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
1692
  "model.layers.19.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
1693
  "model.layers.19.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
 
1827
  "model.layers.2.mlp.experts.gate_up_projs.8.weight": "model-00001-of-00009.safetensors",
1828
  "model.layers.2.mlp.experts.gate_up_projs.9.bias": "model-00001-of-00009.safetensors",
1829
  "model.layers.2.mlp.experts.gate_up_projs.9.weight": "model-00001-of-00009.safetensors",
1830
+ "model.layers.2.mlp.router.linear.bias": "model-00001-of-00009.safetensors",
1831
+ "model.layers.2.mlp.router.linear.weight": "model-00001-of-00009.safetensors",
1832
  "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
1833
  "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
1834
  "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
 
1968
  "model.layers.20.mlp.experts.gate_up_projs.8.weight": "model-00007-of-00009.safetensors",
1969
  "model.layers.20.mlp.experts.gate_up_projs.9.bias": "model-00007-of-00009.safetensors",
1970
  "model.layers.20.mlp.experts.gate_up_projs.9.weight": "model-00007-of-00009.safetensors",
1971
+ "model.layers.20.mlp.router.linear.bias": "model-00007-of-00009.safetensors",
1972
+ "model.layers.20.mlp.router.linear.weight": "model-00007-of-00009.safetensors",
1973
  "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
1974
  "model.layers.20.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
1975
  "model.layers.20.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
 
2109
  "model.layers.21.mlp.experts.gate_up_projs.8.weight": "model-00008-of-00009.safetensors",
2110
  "model.layers.21.mlp.experts.gate_up_projs.9.bias": "model-00008-of-00009.safetensors",
2111
  "model.layers.21.mlp.experts.gate_up_projs.9.weight": "model-00008-of-00009.safetensors",
2112
+ "model.layers.21.mlp.router.linear.bias": "model-00008-of-00009.safetensors",
2113
+ "model.layers.21.mlp.router.linear.weight": "model-00008-of-00009.safetensors",
2114
  "model.layers.21.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
2115
  "model.layers.21.self_attn.k_proj.bias": "model-00008-of-00009.safetensors",
2116
  "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
 
2250
  "model.layers.22.mlp.experts.gate_up_projs.8.weight": "model-00008-of-00009.safetensors",
2251
  "model.layers.22.mlp.experts.gate_up_projs.9.bias": "model-00008-of-00009.safetensors",
2252
  "model.layers.22.mlp.experts.gate_up_projs.9.weight": "model-00008-of-00009.safetensors",
2253
+ "model.layers.22.mlp.router.linear.bias": "model-00008-of-00009.safetensors",
2254
+ "model.layers.22.mlp.router.linear.weight": "model-00008-of-00009.safetensors",
2255
  "model.layers.22.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
2256
  "model.layers.22.self_attn.k_proj.bias": "model-00008-of-00009.safetensors",
2257
  "model.layers.22.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
 
2391
  "model.layers.23.mlp.experts.gate_up_projs.8.weight": "model-00008-of-00009.safetensors",
2392
  "model.layers.23.mlp.experts.gate_up_projs.9.bias": "model-00008-of-00009.safetensors",
2393
  "model.layers.23.mlp.experts.gate_up_projs.9.weight": "model-00008-of-00009.safetensors",
2394
+ "model.layers.23.mlp.router.linear.bias": "model-00008-of-00009.safetensors",
2395
+ "model.layers.23.mlp.router.linear.weight": "model-00008-of-00009.safetensors",
2396
  "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00009.safetensors",
2397
  "model.layers.23.self_attn.k_proj.bias": "model-00008-of-00009.safetensors",
2398
  "model.layers.23.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
 
2532
  "model.layers.3.mlp.experts.gate_up_projs.8.weight": "model-00002-of-00009.safetensors",
2533
  "model.layers.3.mlp.experts.gate_up_projs.9.bias": "model-00002-of-00009.safetensors",
2534
  "model.layers.3.mlp.experts.gate_up_projs.9.weight": "model-00002-of-00009.safetensors",
2535
+ "model.layers.3.mlp.router.linear.bias": "model-00002-of-00009.safetensors",
2536
+ "model.layers.3.mlp.router.linear.weight": "model-00002-of-00009.safetensors",
2537
  "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
2538
  "model.layers.3.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
2539
  "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
 
2673
  "model.layers.4.mlp.experts.gate_up_projs.8.weight": "model-00002-of-00009.safetensors",
2674
  "model.layers.4.mlp.experts.gate_up_projs.9.bias": "model-00002-of-00009.safetensors",
2675
  "model.layers.4.mlp.experts.gate_up_projs.9.weight": "model-00002-of-00009.safetensors",
2676
+ "model.layers.4.mlp.router.linear.bias": "model-00002-of-00009.safetensors",
2677
+ "model.layers.4.mlp.router.linear.weight": "model-00002-of-00009.safetensors",
2678
  "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
2679
  "model.layers.4.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
2680
  "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
 
2814
  "model.layers.5.mlp.experts.gate_up_projs.8.weight": "model-00002-of-00009.safetensors",
2815
  "model.layers.5.mlp.experts.gate_up_projs.9.bias": "model-00002-of-00009.safetensors",
2816
  "model.layers.5.mlp.experts.gate_up_projs.9.weight": "model-00002-of-00009.safetensors",
2817
+ "model.layers.5.mlp.router.linear.bias": "model-00002-of-00009.safetensors",
2818
+ "model.layers.5.mlp.router.linear.weight": "model-00002-of-00009.safetensors",
2819
  "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
2820
  "model.layers.5.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
2821
  "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
 
2955
  "model.layers.6.mlp.experts.gate_up_projs.8.weight": "model-00003-of-00009.safetensors",
2956
  "model.layers.6.mlp.experts.gate_up_projs.9.bias": "model-00003-of-00009.safetensors",
2957
  "model.layers.6.mlp.experts.gate_up_projs.9.weight": "model-00003-of-00009.safetensors",
2958
+ "model.layers.6.mlp.router.linear.bias": "model-00003-of-00009.safetensors",
2959
+ "model.layers.6.mlp.router.linear.weight": "model-00003-of-00009.safetensors",
2960
  "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
2961
  "model.layers.6.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
2962
  "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
 
3096
  "model.layers.7.mlp.experts.gate_up_projs.8.weight": "model-00003-of-00009.safetensors",
3097
  "model.layers.7.mlp.experts.gate_up_projs.9.bias": "model-00003-of-00009.safetensors",
3098
  "model.layers.7.mlp.experts.gate_up_projs.9.weight": "model-00003-of-00009.safetensors",
3099
+ "model.layers.7.mlp.router.linear.bias": "model-00003-of-00009.safetensors",
3100
+ "model.layers.7.mlp.router.linear.weight": "model-00003-of-00009.safetensors",
3101
  "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
3102
  "model.layers.7.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
3103
  "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
 
3237
  "model.layers.8.mlp.experts.gate_up_projs.8.weight": "model-00003-of-00009.safetensors",
3238
  "model.layers.8.mlp.experts.gate_up_projs.9.bias": "model-00003-of-00009.safetensors",
3239
  "model.layers.8.mlp.experts.gate_up_projs.9.weight": "model-00003-of-00009.safetensors",
3240
+ "model.layers.8.mlp.router.linear.bias": "model-00003-of-00009.safetensors",
3241
+ "model.layers.8.mlp.router.linear.weight": "model-00003-of-00009.safetensors",
3242
  "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
3243
  "model.layers.8.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
3244
  "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
 
3378
  "model.layers.9.mlp.experts.gate_up_projs.8.weight": "model-00004-of-00009.safetensors",
3379
  "model.layers.9.mlp.experts.gate_up_projs.9.bias": "model-00004-of-00009.safetensors",
3380
  "model.layers.9.mlp.experts.gate_up_projs.9.weight": "model-00004-of-00009.safetensors",
3381
+ "model.layers.9.mlp.router.linear.bias": "model-00004-of-00009.safetensors",
3382
+ "model.layers.9.mlp.router.linear.weight": "model-00004-of-00009.safetensors",
3383
  "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
3384
  "model.layers.9.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
3385
  "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",