Narsil HF Staff commited on
Commit
16043c2
·
verified ·
1 Parent(s): 18a0504

Upload topology.json

Browse files
Files changed (1) hide show
  1. topology.json +875 -875
topology.json CHANGED
@@ -1,54 +1,32 @@
1
  {
2
  "tensors": {
3
- "h.2.ln_2.weight": {
4
  "type": "Distributed",
5
  "shape": [
 
6
  768
7
  ],
8
  "dtype": "F32",
9
  "chunks": [
10
  {
11
  "offsets": [
 
12
  0
13
  ],
14
  "shape": [
15
- 384
 
16
  ],
17
  "filename_index": 0
18
  },
19
  {
20
  "offsets": [
21
- 384
22
- ],
23
- "shape": [
24
- 384
25
- ],
26
- "filename_index": 1
27
- }
28
- ]
29
- },
30
- "h.5.mlp.c_proj.bias": {
31
- "type": "Distributed",
32
- "shape": [
33
- 768
34
- ],
35
- "dtype": "F32",
36
- "chunks": [
37
- {
38
- "offsets": [
39
  0
40
  ],
41
  "shape": [
42
- 384
43
- ],
44
- "filename_index": 0
45
- },
46
- {
47
- "offsets": [
48
- 384
49
- ],
50
- "shape": [
51
- 384
52
  ],
53
  "filename_index": 1
54
  }
@@ -86,52 +64,37 @@
86
  }
87
  ]
88
  },
89
- "h.5.attn.bias": {
90
  "type": "Distributed",
91
  "shape": [
92
- 1,
93
- 1,
94
- 1024,
95
- 1024
96
  ],
97
  "dtype": "F32",
98
  "chunks": [
99
  {
100
  "offsets": [
101
- 0,
102
- 0,
103
- 0,
104
  0
105
  ],
106
  "shape": [
107
- 1,
108
- 1,
109
- 1024,
110
- 512
111
  ],
112
  "filename_index": 0
113
  },
114
  {
115
  "offsets": [
116
- 0,
117
- 0,
118
- 0,
119
- 512
120
  ],
121
  "shape": [
122
- 1,
123
- 1,
124
- 1024,
125
- 512
126
  ],
127
  "filename_index": 1
128
  }
129
  ]
130
  },
131
- "h.9.mlp.c_fc.bias": {
132
  "type": "Distributed",
133
  "shape": [
134
- 3072
135
  ],
136
  "dtype": "F32",
137
  "chunks": [
@@ -140,26 +103,26 @@
140
  0
141
  ],
142
  "shape": [
143
- 1536
144
  ],
145
  "filename_index": 0
146
  },
147
  {
148
  "offsets": [
149
- 1536
150
  ],
151
  "shape": [
152
- 1536
153
  ],
154
  "filename_index": 1
155
  }
156
  ]
157
  },
158
- "h.11.mlp.c_fc.weight": {
159
  "type": "Distributed",
160
  "shape": [
161
  768,
162
- 3072
163
  ],
164
  "dtype": "F32",
165
  "chunks": [
@@ -170,24 +133,24 @@
170
  ],
171
  "shape": [
172
  768,
173
- 1536
174
  ],
175
  "filename_index": 0
176
  },
177
  {
178
  "offsets": [
179
  0,
180
- 1536
181
  ],
182
  "shape": [
183
  768,
184
- 1536
185
  ],
186
  "filename_index": 1
187
  }
188
  ]
189
  },
190
- "h.11.ln_2.weight": {
191
  "type": "Distributed",
192
  "shape": [
193
  768
@@ -214,7 +177,7 @@
214
  }
215
  ]
216
  },
217
- "h.4.ln_1.weight": {
218
  "type": "Distributed",
219
  "shape": [
220
  768
@@ -241,7 +204,7 @@
241
  }
242
  ]
243
  },
244
- "h.8.attn.c_proj.bias": {
245
  "type": "Distributed",
246
  "shape": [
247
  768
@@ -268,7 +231,7 @@
268
  }
269
  ]
270
  },
271
- "h.8.ln_1.weight": {
272
  "type": "Distributed",
273
  "shape": [
274
  768
@@ -295,11 +258,11 @@
295
  }
296
  ]
297
  },
298
- "h.10.attn.c_attn.weight": {
299
  "type": "Distributed",
300
  "shape": [
301
- 768,
302
- 2304
303
  ],
304
  "dtype": "F32",
305
  "chunks": [
@@ -309,52 +272,57 @@
309
  0
310
  ],
311
  "shape": [
312
- 768,
313
- 1152
314
  ],
315
  "filename_index": 0
316
  },
317
  {
318
  "offsets": [
319
- 0,
320
- 1152
321
  ],
322
  "shape": [
323
- 768,
324
- 1152
325
  ],
326
  "filename_index": 1
327
  }
328
  ]
329
  },
330
- "ln_f.bias": {
331
  "type": "Distributed",
332
  "shape": [
333
- 768
 
334
  ],
335
  "dtype": "F32",
336
  "chunks": [
337
  {
338
  "offsets": [
 
339
  0
340
  ],
341
  "shape": [
342
- 384
 
343
  ],
344
  "filename_index": 0
345
  },
346
  {
347
  "offsets": [
348
- 384
 
349
  ],
350
  "shape": [
351
- 384
 
352
  ],
353
  "filename_index": 1
354
  }
355
  ]
356
  },
357
- "h.3.ln_1.weight": {
358
  "type": "Distributed",
359
  "shape": [
360
  768
@@ -381,74 +349,64 @@
381
  }
382
  ]
383
  },
384
- "h.0.mlp.c_fc.weight": {
385
  "type": "Distributed",
386
  "shape": [
387
- 768,
388
- 3072
389
  ],
390
  "dtype": "F32",
391
  "chunks": [
392
  {
393
  "offsets": [
394
- 0,
395
  0
396
  ],
397
  "shape": [
398
- 768,
399
- 1536
400
  ],
401
  "filename_index": 0
402
  },
403
  {
404
  "offsets": [
405
- 0,
406
- 1536
407
  ],
408
  "shape": [
409
- 768,
410
- 1536
411
  ],
412
  "filename_index": 1
413
  }
414
  ]
415
  },
416
- "h.10.attn.c_proj.weight": {
417
  "type": "Distributed",
418
  "shape": [
419
- 768,
420
  768
421
  ],
422
  "dtype": "F32",
423
  "chunks": [
424
  {
425
  "offsets": [
426
- 0,
427
  0
428
  ],
429
  "shape": [
430
- 384,
431
- 768
432
  ],
433
  "filename_index": 0
434
  },
435
  {
436
  "offsets": [
437
- 384,
438
- 0
439
  ],
440
  "shape": [
441
- 384,
442
- 768
443
  ],
444
  "filename_index": 1
445
  }
446
  ]
447
  },
448
- "h.1.attn.c_proj.weight": {
449
  "type": "Distributed",
450
  "shape": [
451
- 768,
452
  768
453
  ],
454
  "dtype": "F32",
@@ -459,25 +417,25 @@
459
  0
460
  ],
461
  "shape": [
462
- 384,
463
  768
464
  ],
465
  "filename_index": 0
466
  },
467
  {
468
  "offsets": [
469
- 384,
470
  0
471
  ],
472
  "shape": [
473
- 384,
474
  768
475
  ],
476
  "filename_index": 1
477
  }
478
  ]
479
  },
480
- "h.3.ln_2.bias": {
481
  "type": "Distributed",
482
  "shape": [
483
  768
@@ -504,10 +462,10 @@
504
  }
505
  ]
506
  },
507
- "h.2.attn.c_proj.bias": {
508
  "type": "Distributed",
509
  "shape": [
510
- 768
511
  ],
512
  "dtype": "F32",
513
  "chunks": [
@@ -516,25 +474,25 @@
516
  0
517
  ],
518
  "shape": [
519
- 384
520
  ],
521
  "filename_index": 0
522
  },
523
  {
524
  "offsets": [
525
- 384
526
  ],
527
  "shape": [
528
- 384
529
  ],
530
  "filename_index": 1
531
  }
532
  ]
533
  },
534
- "h.6.ln_2.weight": {
535
  "type": "Distributed",
536
  "shape": [
537
- 768
538
  ],
539
  "dtype": "F32",
540
  "chunks": [
@@ -543,26 +501,26 @@
543
  0
544
  ],
545
  "shape": [
546
- 384
547
  ],
548
  "filename_index": 0
549
  },
550
  {
551
  "offsets": [
552
- 384
553
  ],
554
  "shape": [
555
- 384
556
  ],
557
  "filename_index": 1
558
  }
559
  ]
560
  },
561
- "h.3.attn.c_attn.weight": {
562
  "type": "Distributed",
563
  "shape": [
564
  768,
565
- 2304
566
  ],
567
  "dtype": "F32",
568
  "chunks": [
@@ -573,88 +531,78 @@
573
  ],
574
  "shape": [
575
  768,
576
- 1152
577
  ],
578
  "filename_index": 0
579
  },
580
  {
581
  "offsets": [
582
  0,
583
- 1152
584
  ],
585
  "shape": [
586
  768,
587
- 1152
588
  ],
589
  "filename_index": 1
590
  }
591
  ]
592
  },
593
- "h.8.mlp.c_proj.weight": {
594
  "type": "Distributed",
595
  "shape": [
596
- 3072,
597
- 768
598
  ],
599
  "dtype": "F32",
600
  "chunks": [
601
  {
602
  "offsets": [
603
- 0,
604
  0
605
  ],
606
  "shape": [
607
- 1536,
608
- 768
609
  ],
610
  "filename_index": 0
611
  },
612
  {
613
  "offsets": [
614
- 1536,
615
- 0
616
  ],
617
  "shape": [
618
- 1536,
619
- 768
620
  ],
621
  "filename_index": 1
622
  }
623
  ]
624
  },
625
- "h.11.attn.c_attn.weight": {
626
  "type": "Distributed",
627
  "shape": [
628
- 768,
629
- 2304
630
  ],
631
  "dtype": "F32",
632
  "chunks": [
633
  {
634
  "offsets": [
635
- 0,
636
  0
637
  ],
638
  "shape": [
639
- 768,
640
- 1152
641
  ],
642
  "filename_index": 0
643
  },
644
  {
645
  "offsets": [
646
- 0,
647
- 1152
648
  ],
649
  "shape": [
650
- 768,
651
- 1152
652
  ],
653
  "filename_index": 1
654
  }
655
  ]
656
  },
657
- "h.6.attn.c_proj.weight": {
658
  "type": "Distributed",
659
  "shape": [
660
  768,
@@ -686,49 +634,39 @@
686
  }
687
  ]
688
  },
689
- "h.10.attn.bias": {
690
  "type": "Distributed",
691
  "shape": [
692
- 1,
693
- 1,
694
- 1024,
695
- 1024
696
  ],
697
  "dtype": "F32",
698
  "chunks": [
699
  {
700
  "offsets": [
701
- 0,
702
- 0,
703
  0,
704
  0
705
  ],
706
  "shape": [
707
- 1,
708
- 1,
709
- 1024,
710
- 512
711
  ],
712
  "filename_index": 0
713
  },
714
  {
715
  "offsets": [
716
  0,
717
- 0,
718
- 0,
719
- 512
720
  ],
721
  "shape": [
722
- 1,
723
- 1,
724
- 1024,
725
- 512
726
  ],
727
  "filename_index": 1
728
  }
729
  ]
730
  },
731
- "h.1.mlp.c_fc.weight": {
732
  "type": "Distributed",
733
  "shape": [
734
  768,
@@ -760,10 +698,10 @@
760
  }
761
  ]
762
  },
763
- "h.3.mlp.c_fc.bias": {
764
  "type": "Distributed",
765
  "shape": [
766
- 3072
767
  ],
768
  "dtype": "F32",
769
  "chunks": [
@@ -772,26 +710,26 @@
772
  0
773
  ],
774
  "shape": [
775
- 1536
776
  ],
777
  "filename_index": 0
778
  },
779
  {
780
  "offsets": [
781
- 1536
782
  ],
783
  "shape": [
784
- 1536
785
  ],
786
  "filename_index": 1
787
  }
788
  ]
789
  },
790
- "h.8.attn.c_attn.weight": {
791
  "type": "Distributed",
792
  "shape": [
793
  768,
794
- 2304
795
  ],
796
  "dtype": "F32",
797
  "chunks": [
@@ -802,56 +740,51 @@
802
  ],
803
  "shape": [
804
  768,
805
- 1152
806
  ],
807
  "filename_index": 0
808
  },
809
  {
810
  "offsets": [
811
  0,
812
- 1152
813
  ],
814
  "shape": [
815
  768,
816
- 1152
817
  ],
818
  "filename_index": 1
819
  }
820
  ]
821
  },
822
- "h.10.mlp.c_fc.weight": {
823
  "type": "Distributed",
824
  "shape": [
825
- 768,
826
- 3072
827
  ],
828
  "dtype": "F32",
829
  "chunks": [
830
  {
831
  "offsets": [
832
- 0,
833
  0
834
  ],
835
  "shape": [
836
- 768,
837
- 1536
838
  ],
839
  "filename_index": 0
840
  },
841
  {
842
  "offsets": [
843
- 0,
844
- 1536
845
  ],
846
  "shape": [
847
- 768,
848
- 1536
849
  ],
850
  "filename_index": 1
851
  }
852
  ]
853
  },
854
- "h.1.mlp.c_fc.bias": {
855
  "type": "Distributed",
856
  "shape": [
857
  3072
@@ -878,10 +811,10 @@
878
  }
879
  ]
880
  },
881
- "h.3.attn.c_proj.weight": {
882
  "type": "Distributed",
883
  "shape": [
884
- 768,
885
  768
886
  ],
887
  "dtype": "F32",
@@ -892,25 +825,25 @@
892
  0
893
  ],
894
  "shape": [
895
- 384,
896
  768
897
  ],
898
  "filename_index": 0
899
  },
900
  {
901
  "offsets": [
902
- 384,
903
  0
904
  ],
905
  "shape": [
906
- 384,
907
  768
908
  ],
909
  "filename_index": 1
910
  }
911
  ]
912
  },
913
- "ln_f.weight": {
914
  "type": "Distributed",
915
  "shape": [
916
  768
@@ -937,61 +870,71 @@
937
  }
938
  ]
939
  },
940
- "h.7.attn.c_attn.bias": {
941
  "type": "Distributed",
942
  "shape": [
943
- 2304
 
944
  ],
945
  "dtype": "F32",
946
  "chunks": [
947
  {
948
  "offsets": [
 
949
  0
950
  ],
951
  "shape": [
952
- 1152
 
953
  ],
954
  "filename_index": 0
955
  },
956
  {
957
  "offsets": [
958
- 1152
 
959
  ],
960
  "shape": [
961
- 1152
 
962
  ],
963
  "filename_index": 1
964
  }
965
  ]
966
  },
967
- "h.11.attn.c_attn.bias": {
968
  "type": "Distributed",
969
  "shape": [
970
- 2304
 
971
  ],
972
  "dtype": "F32",
973
  "chunks": [
974
  {
975
  "offsets": [
 
976
  0
977
  ],
978
  "shape": [
979
- 1152
 
980
  ],
981
  "filename_index": 0
982
  },
983
  {
984
  "offsets": [
985
- 1152
 
986
  ],
987
  "shape": [
988
- 1152
 
989
  ],
990
  "filename_index": 1
991
  }
992
  ]
993
  },
994
- "h.11.ln_1.weight": {
995
  "type": "Distributed",
996
  "shape": [
997
  768
@@ -1018,10 +961,10 @@
1018
  }
1019
  ]
1020
  },
1021
- "h.5.attn.c_proj.weight": {
1022
  "type": "Distributed",
1023
  "shape": [
1024
- 768,
1025
  768
1026
  ],
1027
  "dtype": "F32",
@@ -1032,29 +975,29 @@
1032
  0
1033
  ],
1034
  "shape": [
1035
- 384,
1036
  768
1037
  ],
1038
  "filename_index": 0
1039
  },
1040
  {
1041
  "offsets": [
1042
- 384,
1043
  0
1044
  ],
1045
  "shape": [
1046
- 384,
1047
  768
1048
  ],
1049
  "filename_index": 1
1050
  }
1051
  ]
1052
  },
1053
- "h.9.mlp.c_proj.weight": {
1054
  "type": "Distributed",
1055
  "shape": [
1056
- 3072,
1057
- 768
1058
  ],
1059
  "dtype": "F32",
1060
  "chunks": [
@@ -1064,52 +1007,67 @@
1064
  0
1065
  ],
1066
  "shape": [
1067
- 1536,
1068
- 768
1069
  ],
1070
  "filename_index": 0
1071
  },
1072
  {
1073
  "offsets": [
1074
- 1536,
1075
- 0
1076
  ],
1077
  "shape": [
1078
- 1536,
1079
- 768
1080
  ],
1081
  "filename_index": 1
1082
  }
1083
  ]
1084
  },
1085
- "h.4.mlp.c_fc.bias": {
1086
  "type": "Distributed",
1087
  "shape": [
1088
- 3072
 
 
 
1089
  ],
1090
  "dtype": "F32",
1091
  "chunks": [
1092
  {
1093
  "offsets": [
 
 
 
1094
  0
1095
  ],
1096
  "shape": [
1097
- 1536
 
 
 
1098
  ],
1099
  "filename_index": 0
1100
  },
1101
  {
1102
  "offsets": [
1103
- 1536
 
 
 
1104
  ],
1105
  "shape": [
1106
- 1536
 
 
 
1107
  ],
1108
  "filename_index": 1
1109
  }
1110
  ]
1111
  },
1112
- "h.3.ln_1.bias": {
1113
  "type": "Distributed",
1114
  "shape": [
1115
  768
@@ -1136,7 +1094,7 @@
1136
  }
1137
  ]
1138
  },
1139
- "h.7.ln_1.weight": {
1140
  "type": "Distributed",
1141
  "shape": [
1142
  768
@@ -1163,91 +1121,66 @@
1163
  }
1164
  ]
1165
  },
1166
- "h.8.attn.bias": {
1167
  "type": "Distributed",
1168
  "shape": [
1169
- 1,
1170
- 1,
1171
- 1024,
1172
- 1024
1173
  ],
1174
  "dtype": "F32",
1175
  "chunks": [
1176
  {
1177
  "offsets": [
1178
- 0,
1179
- 0,
1180
- 0,
1181
  0
1182
  ],
1183
  "shape": [
1184
- 1,
1185
- 1,
1186
- 1024,
1187
- 512
1188
  ],
1189
  "filename_index": 0
1190
  },
1191
  {
1192
  "offsets": [
1193
- 0,
1194
- 0,
1195
- 0,
1196
- 512
1197
  ],
1198
  "shape": [
1199
- 1,
1200
- 1,
1201
- 1024,
1202
- 512
1203
  ],
1204
  "filename_index": 1
1205
  }
1206
  ]
1207
  },
1208
- "h.11.attn.bias": {
1209
  "type": "Distributed",
1210
  "shape": [
1211
- 1,
1212
- 1,
1213
- 1024,
1214
- 1024
1215
  ],
1216
  "dtype": "F32",
1217
  "chunks": [
1218
  {
1219
  "offsets": [
1220
- 0,
1221
- 0,
1222
  0,
1223
  0
1224
  ],
1225
  "shape": [
1226
- 1,
1227
- 1,
1228
- 1024,
1229
- 512
1230
  ],
1231
  "filename_index": 0
1232
  },
1233
  {
1234
  "offsets": [
1235
- 0,
1236
- 0,
1237
- 0,
1238
- 512
1239
  ],
1240
  "shape": [
1241
- 1,
1242
- 1,
1243
- 1024,
1244
- 512
1245
  ],
1246
  "filename_index": 1
1247
  }
1248
  ]
1249
  },
1250
- "h.5.ln_1.bias": {
1251
  "type": "Distributed",
1252
  "shape": [
1253
  768
@@ -1274,10 +1207,10 @@
1274
  }
1275
  ]
1276
  },
1277
- "h.7.mlp.c_proj.weight": {
1278
  "type": "Distributed",
1279
  "shape": [
1280
- 3072,
1281
  768
1282
  ],
1283
  "dtype": "F32",
@@ -1288,25 +1221,25 @@
1288
  0
1289
  ],
1290
  "shape": [
1291
- 1536,
1292
  768
1293
  ],
1294
  "filename_index": 0
1295
  },
1296
  {
1297
  "offsets": [
1298
- 1536,
1299
  0
1300
  ],
1301
  "shape": [
1302
- 1536,
1303
  768
1304
  ],
1305
  "filename_index": 1
1306
  }
1307
  ]
1308
  },
1309
- "h.10.mlp.c_proj.bias": {
1310
  "type": "Distributed",
1311
  "shape": [
1312
  768
@@ -1333,11 +1266,11 @@
1333
  }
1334
  ]
1335
  },
1336
- "h.3.mlp.c_proj.weight": {
1337
  "type": "Distributed",
1338
  "shape": [
1339
- 3072,
1340
- 768
1341
  ],
1342
  "dtype": "F32",
1343
  "chunks": [
@@ -1347,25 +1280,25 @@
1347
  0
1348
  ],
1349
  "shape": [
1350
- 1536,
1351
- 768
1352
  ],
1353
  "filename_index": 0
1354
  },
1355
  {
1356
  "offsets": [
1357
- 1536,
1358
- 0
1359
  ],
1360
  "shape": [
1361
- 1536,
1362
- 768
1363
  ],
1364
  "filename_index": 1
1365
  }
1366
  ]
1367
  },
1368
- "h.6.attn.c_attn.weight": {
1369
  "type": "Distributed",
1370
  "shape": [
1371
  768,
@@ -1397,101 +1330,96 @@
1397
  }
1398
  ]
1399
  },
1400
- "h.8.ln_2.weight": {
1401
  "type": "Distributed",
1402
  "shape": [
1403
- 768
 
1404
  ],
1405
  "dtype": "F32",
1406
  "chunks": [
1407
  {
1408
  "offsets": [
 
1409
  0
1410
  ],
1411
  "shape": [
1412
- 384
 
1413
  ],
1414
  "filename_index": 0
1415
  },
1416
  {
1417
  "offsets": [
1418
- 384
 
1419
  ],
1420
  "shape": [
1421
- 384
 
1422
  ],
1423
  "filename_index": 1
1424
  }
1425
  ]
1426
  },
1427
- "h.7.attn.c_attn.weight": {
1428
  "type": "Distributed",
1429
  "shape": [
1430
- 768,
1431
- 2304
1432
  ],
1433
  "dtype": "F32",
1434
  "chunks": [
1435
  {
1436
  "offsets": [
1437
- 0,
1438
  0
1439
  ],
1440
  "shape": [
1441
- 768,
1442
- 1152
1443
  ],
1444
  "filename_index": 0
1445
  },
1446
  {
1447
  "offsets": [
1448
- 0,
1449
- 1152
1450
  ],
1451
  "shape": [
1452
- 768,
1453
- 1152
1454
  ],
1455
  "filename_index": 1
1456
  }
1457
  ]
1458
  },
1459
- "h.8.attn.c_proj.weight": {
1460
  "type": "Distributed",
1461
  "shape": [
1462
- 768,
1463
  768
1464
  ],
1465
  "dtype": "F32",
1466
  "chunks": [
1467
  {
1468
  "offsets": [
1469
- 0,
1470
  0
1471
  ],
1472
  "shape": [
1473
- 384,
1474
- 768
1475
  ],
1476
  "filename_index": 0
1477
  },
1478
  {
1479
  "offsets": [
1480
- 384,
1481
- 0
1482
  ],
1483
  "shape": [
1484
- 384,
1485
- 768
1486
  ],
1487
  "filename_index": 1
1488
  }
1489
  ]
1490
  },
1491
- "h.10.attn.c_proj.bias": {
1492
  "type": "Distributed",
1493
  "shape": [
1494
- 768
1495
  ],
1496
  "dtype": "F32",
1497
  "chunks": [
@@ -1500,25 +1428,25 @@
1500
  0
1501
  ],
1502
  "shape": [
1503
- 384
1504
  ],
1505
  "filename_index": 0
1506
  },
1507
  {
1508
  "offsets": [
1509
- 384
1510
  ],
1511
  "shape": [
1512
- 384
1513
  ],
1514
  "filename_index": 1
1515
  }
1516
  ]
1517
  },
1518
- "h.7.ln_2.weight": {
1519
  "type": "Distributed",
1520
  "shape": [
1521
- 768
1522
  ],
1523
  "dtype": "F32",
1524
  "chunks": [
@@ -1527,49 +1455,54 @@
1527
  0
1528
  ],
1529
  "shape": [
1530
- 384
1531
  ],
1532
  "filename_index": 0
1533
  },
1534
  {
1535
  "offsets": [
1536
- 384
1537
  ],
1538
  "shape": [
1539
- 384
1540
  ],
1541
  "filename_index": 1
1542
  }
1543
  ]
1544
  },
1545
- "h.8.mlp.c_fc.bias": {
1546
  "type": "Distributed",
1547
  "shape": [
1548
- 3072
 
1549
  ],
1550
  "dtype": "F32",
1551
  "chunks": [
1552
  {
1553
  "offsets": [
 
1554
  0
1555
  ],
1556
  "shape": [
1557
- 1536
 
1558
  ],
1559
  "filename_index": 0
1560
  },
1561
  {
1562
  "offsets": [
1563
- 1536
 
1564
  ],
1565
  "shape": [
1566
- 1536
 
1567
  ],
1568
  "filename_index": 1
1569
  }
1570
  ]
1571
  },
1572
- "h.2.mlp.c_fc.weight": {
1573
  "type": "Distributed",
1574
  "shape": [
1575
  768,
@@ -1601,7 +1534,7 @@
1601
  }
1602
  ]
1603
  },
1604
- "h.4.ln_2.weight": {
1605
  "type": "Distributed",
1606
  "shape": [
1607
  768
@@ -1628,10 +1561,10 @@
1628
  }
1629
  ]
1630
  },
1631
- "h.9.attn.c_attn.bias": {
1632
  "type": "Distributed",
1633
  "shape": [
1634
- 2304
1635
  ],
1636
  "dtype": "F32",
1637
  "chunks": [
@@ -1640,48 +1573,70 @@
1640
  0
1641
  ],
1642
  "shape": [
1643
- 1152
1644
  ],
1645
  "filename_index": 0
1646
  },
1647
  {
1648
  "offsets": [
1649
- 1152
1650
  ],
1651
  "shape": [
1652
- 1152
1653
  ],
1654
  "filename_index": 1
1655
  }
1656
  ]
1657
  },
1658
- "h.4.attn.c_proj.weight": {
1659
  "type": "Distributed",
1660
  "shape": [
1661
- 768,
1662
  768
1663
  ],
1664
  "dtype": "F32",
1665
  "chunks": [
1666
  {
1667
  "offsets": [
1668
- 0,
1669
  0
1670
  ],
1671
  "shape": [
1672
- 384,
1673
- 768
1674
  ],
1675
  "filename_index": 0
1676
  },
1677
  {
1678
  "offsets": [
1679
- 384,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1680
  0
1681
  ],
1682
  "shape": [
1683
- 384,
1684
- 768
 
 
 
 
 
 
 
 
1685
  ],
1686
  "filename_index": 1
1687
  }
@@ -1714,7 +1669,7 @@
1714
  }
1715
  ]
1716
  },
1717
- "h.10.mlp.c_proj.weight": {
1718
  "type": "Distributed",
1719
  "shape": [
1720
  3072,
@@ -1746,39 +1701,61 @@
1746
  }
1747
  ]
1748
  },
1749
- "wte.weight": {
1750
  "type": "Distributed",
1751
  "shape": [
1752
- 50257,
1753
- 768
1754
  ],
1755
  "dtype": "F32",
1756
  "chunks": [
1757
  {
1758
  "offsets": [
1759
- 0,
1760
  0
1761
  ],
1762
  "shape": [
1763
- 50257,
1764
- 384
1765
  ],
1766
  "filename_index": 0
1767
  },
1768
  {
1769
  "offsets": [
1770
- 0,
1771
- 384
1772
  ],
1773
  "shape": [
1774
- 50257,
1775
- 384
1776
  ],
1777
  "filename_index": 1
1778
  }
1779
  ]
1780
  },
1781
- "h.10.ln_2.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1782
  "type": "Distributed",
1783
  "shape": [
1784
  768
@@ -1805,11 +1782,11 @@
1805
  }
1806
  ]
1807
  },
1808
- "h.9.attn.c_attn.weight": {
1809
  "type": "Distributed",
1810
  "shape": [
1811
  768,
1812
- 2304
1813
  ],
1814
  "dtype": "F32",
1815
  "chunks": [
@@ -1819,25 +1796,25 @@
1819
  0
1820
  ],
1821
  "shape": [
1822
- 768,
1823
- 1152
1824
  ],
1825
  "filename_index": 0
1826
  },
1827
  {
1828
  "offsets": [
1829
- 0,
1830
- 1152
1831
  ],
1832
  "shape": [
1833
- 768,
1834
- 1152
1835
  ],
1836
  "filename_index": 1
1837
  }
1838
  ]
1839
  },
1840
- "h.2.mlp.c_proj.bias": {
1841
  "type": "Distributed",
1842
  "shape": [
1843
  768
@@ -1864,34 +1841,39 @@
1864
  }
1865
  ]
1866
  },
1867
- "h.1.attn.c_proj.bias": {
1868
  "type": "Distributed",
1869
  "shape": [
 
1870
  768
1871
  ],
1872
  "dtype": "F32",
1873
  "chunks": [
1874
  {
1875
  "offsets": [
 
1876
  0
1877
  ],
1878
  "shape": [
1879
- 384
 
1880
  ],
1881
  "filename_index": 0
1882
  },
1883
  {
1884
  "offsets": [
1885
- 384
 
1886
  ],
1887
  "shape": [
1888
- 384
 
1889
  ],
1890
  "filename_index": 1
1891
  }
1892
  ]
1893
  },
1894
- "h.9.ln_2.weight": {
1895
  "type": "Distributed",
1896
  "shape": [
1897
  768
@@ -1918,113 +1900,88 @@
1918
  }
1919
  ]
1920
  },
1921
- "h.2.mlp.c_proj.weight": {
1922
  "type": "Distributed",
1923
  "shape": [
1924
- 3072,
1925
  768
1926
  ],
1927
  "dtype": "F32",
1928
  "chunks": [
1929
  {
1930
  "offsets": [
1931
- 0,
1932
  0
1933
  ],
1934
  "shape": [
1935
- 1536,
1936
- 768
1937
  ],
1938
  "filename_index": 0
1939
  },
1940
  {
1941
  "offsets": [
1942
- 1536,
1943
- 0
1944
  ],
1945
  "shape": [
1946
- 1536,
1947
- 768
1948
  ],
1949
  "filename_index": 1
1950
  }
1951
  ]
1952
  },
1953
- "h.0.attn.bias": {
1954
  "type": "Distributed",
1955
  "shape": [
1956
- 1,
1957
- 1,
1958
- 1024,
1959
- 1024
1960
  ],
1961
  "dtype": "F32",
1962
  "chunks": [
1963
  {
1964
  "offsets": [
1965
- 0,
1966
- 0,
1967
- 0,
1968
  0
1969
  ],
1970
  "shape": [
1971
- 1,
1972
- 1,
1973
- 1024,
1974
- 512
1975
  ],
1976
  "filename_index": 0
1977
  },
1978
  {
1979
  "offsets": [
1980
- 0,
1981
- 0,
1982
- 0,
1983
- 512
1984
  ],
1985
  "shape": [
1986
- 1,
1987
- 1,
1988
- 1024,
1989
- 512
1990
  ],
1991
  "filename_index": 1
1992
  }
1993
  ]
1994
  },
1995
- "h.5.attn.c_attn.weight": {
1996
  "type": "Distributed",
1997
  "shape": [
1998
- 768,
1999
- 2304
2000
  ],
2001
  "dtype": "F32",
2002
  "chunks": [
2003
  {
2004
  "offsets": [
2005
- 0,
2006
  0
2007
  ],
2008
  "shape": [
2009
- 768,
2010
- 1152
2011
  ],
2012
  "filename_index": 0
2013
  },
2014
  {
2015
  "offsets": [
2016
- 0,
2017
- 1152
2018
  ],
2019
  "shape": [
2020
- 768,
2021
- 1152
2022
  ],
2023
  "filename_index": 1
2024
  }
2025
  ]
2026
  },
2027
- "h.3.attn.c_proj.bias": {
2028
  "type": "Distributed",
2029
  "shape": [
2030
  768
@@ -2051,113 +2008,88 @@
2051
  }
2052
  ]
2053
  },
2054
- "h.7.attn.bias": {
2055
  "type": "Distributed",
2056
  "shape": [
2057
- 1,
2058
- 1,
2059
- 1024,
2060
- 1024
2061
  ],
2062
  "dtype": "F32",
2063
  "chunks": [
2064
  {
2065
  "offsets": [
2066
- 0,
2067
- 0,
2068
- 0,
2069
  0
2070
  ],
2071
  "shape": [
2072
- 1,
2073
- 1,
2074
- 1024,
2075
- 512
2076
  ],
2077
  "filename_index": 0
2078
  },
2079
  {
2080
  "offsets": [
2081
- 0,
2082
- 0,
2083
- 0,
2084
- 512
2085
  ],
2086
  "shape": [
2087
- 1,
2088
- 1,
2089
- 1024,
2090
- 512
2091
  ],
2092
  "filename_index": 1
2093
  }
2094
  ]
2095
  },
2096
- "h.3.mlp.c_fc.weight": {
2097
  "type": "Distributed",
2098
  "shape": [
2099
- 768,
2100
- 3072
2101
  ],
2102
  "dtype": "F32",
2103
  "chunks": [
2104
  {
2105
  "offsets": [
2106
- 0,
2107
  0
2108
  ],
2109
  "shape": [
2110
- 768,
2111
- 1536
2112
  ],
2113
  "filename_index": 0
2114
  },
2115
  {
2116
  "offsets": [
2117
- 0,
2118
- 1536
2119
  ],
2120
  "shape": [
2121
- 768,
2122
- 1536
2123
  ],
2124
  "filename_index": 1
2125
  }
2126
  ]
2127
  },
2128
- "h.8.mlp.c_fc.weight": {
2129
  "type": "Distributed",
2130
  "shape": [
2131
- 768,
2132
- 3072
2133
  ],
2134
  "dtype": "F32",
2135
  "chunks": [
2136
  {
2137
  "offsets": [
2138
- 0,
2139
  0
2140
  ],
2141
  "shape": [
2142
- 768,
2143
- 1536
2144
  ],
2145
  "filename_index": 0
2146
  },
2147
  {
2148
  "offsets": [
2149
- 0,
2150
- 1536
2151
  ],
2152
  "shape": [
2153
- 768,
2154
- 1536
2155
  ],
2156
  "filename_index": 1
2157
  }
2158
  ]
2159
  },
2160
- "h.0.attn.c_proj.bias": {
2161
  "type": "Distributed",
2162
  "shape": [
2163
  768
@@ -2184,7 +2116,7 @@
2184
  }
2185
  ]
2186
  },
2187
- "h.8.mlp.c_proj.bias": {
2188
  "type": "Distributed",
2189
  "shape": [
2190
  768
@@ -2211,7 +2143,7 @@
2211
  }
2212
  ]
2213
  },
2214
- "h.0.ln_1.weight": {
2215
  "type": "Distributed",
2216
  "shape": [
2217
  768
@@ -2238,76 +2170,66 @@
2238
  }
2239
  ]
2240
  },
2241
- "h.3.attn.bias": {
2242
  "type": "Distributed",
2243
  "shape": [
2244
- 1,
2245
- 1,
2246
- 1024,
2247
- 1024
2248
  ],
2249
  "dtype": "F32",
2250
  "chunks": [
2251
  {
2252
  "offsets": [
2253
- 0,
2254
- 0,
2255
- 0,
2256
  0
2257
  ],
2258
  "shape": [
2259
- 1,
2260
- 1,
2261
- 1024,
2262
- 512
2263
  ],
2264
  "filename_index": 0
2265
  },
2266
  {
2267
  "offsets": [
2268
- 0,
2269
- 0,
2270
- 0,
2271
- 512
2272
  ],
2273
  "shape": [
2274
- 1,
2275
- 1,
2276
- 1024,
2277
- 512
2278
  ],
2279
  "filename_index": 1
2280
  }
2281
  ]
2282
  },
2283
- "h.6.ln_2.bias": {
2284
  "type": "Distributed",
2285
  "shape": [
 
2286
  768
2287
  ],
2288
  "dtype": "F32",
2289
  "chunks": [
2290
  {
2291
  "offsets": [
 
2292
  0
2293
  ],
2294
  "shape": [
2295
- 384
 
2296
  ],
2297
  "filename_index": 0
2298
  },
2299
  {
2300
  "offsets": [
2301
- 384
 
2302
  ],
2303
  "shape": [
2304
- 384
 
2305
  ],
2306
  "filename_index": 1
2307
  }
2308
  ]
2309
  },
2310
- "h.3.ln_2.weight": {
2311
  "type": "Distributed",
2312
  "shape": [
2313
  768
@@ -2334,7 +2256,7 @@
2334
  }
2335
  ]
2336
  },
2337
- "h.4.ln_1.bias": {
2338
  "type": "Distributed",
2339
  "shape": [
2340
  768
@@ -2361,7 +2283,7 @@
2361
  }
2362
  ]
2363
  },
2364
- "h.8.ln_2.bias": {
2365
  "type": "Distributed",
2366
  "shape": [
2367
  768
@@ -2388,7 +2310,7 @@
2388
  }
2389
  ]
2390
  },
2391
- "h.1.ln_2.bias": {
2392
  "type": "Distributed",
2393
  "shape": [
2394
  768
@@ -2415,34 +2337,81 @@
2415
  }
2416
  ]
2417
  },
2418
- "h.1.mlp.c_proj.bias": {
2419
  "type": "Distributed",
2420
  "shape": [
2421
- 768
 
2422
  ],
2423
  "dtype": "F32",
2424
  "chunks": [
2425
  {
2426
  "offsets": [
 
2427
  0
2428
  ],
2429
  "shape": [
2430
- 384
 
2431
  ],
2432
  "filename_index": 0
2433
  },
2434
  {
2435
  "offsets": [
2436
- 384
 
2437
  ],
2438
  "shape": [
2439
- 384
 
2440
  ],
2441
  "filename_index": 1
2442
  }
2443
  ]
2444
  },
2445
- "h.6.mlp.c_proj.weight": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2446
  "type": "Distributed",
2447
  "shape": [
2448
  3072,
@@ -2474,11 +2443,11 @@
2474
  }
2475
  ]
2476
  },
2477
- "h.7.mlp.c_fc.weight": {
2478
  "type": "Distributed",
2479
  "shape": [
2480
  768,
2481
- 3072
2482
  ],
2483
  "dtype": "F32",
2484
  "chunks": [
@@ -2489,56 +2458,51 @@
2489
  ],
2490
  "shape": [
2491
  768,
2492
- 1536
2493
  ],
2494
  "filename_index": 0
2495
  },
2496
  {
2497
  "offsets": [
2498
  0,
2499
- 1536
2500
  ],
2501
  "shape": [
2502
  768,
2503
- 1536
2504
  ],
2505
  "filename_index": 1
2506
  }
2507
  ]
2508
  },
2509
- "h.0.attn.c_proj.weight": {
2510
  "type": "Distributed",
2511
  "shape": [
2512
- 768,
2513
  768
2514
  ],
2515
  "dtype": "F32",
2516
  "chunks": [
2517
  {
2518
  "offsets": [
2519
- 0,
2520
  0
2521
  ],
2522
  "shape": [
2523
- 384,
2524
- 768
2525
  ],
2526
  "filename_index": 0
2527
  },
2528
  {
2529
  "offsets": [
2530
- 384,
2531
- 0
2532
  ],
2533
  "shape": [
2534
- 384,
2535
- 768
2536
  ],
2537
  "filename_index": 1
2538
  }
2539
  ]
2540
  },
2541
- "h.6.ln_1.weight": {
2542
  "type": "Distributed",
2543
  "shape": [
2544
  768
@@ -2565,10 +2529,10 @@
2565
  }
2566
  ]
2567
  },
2568
- "h.5.attn.c_attn.bias": {
2569
  "type": "Distributed",
2570
  "shape": [
2571
- 2304
2572
  ],
2573
  "dtype": "F32",
2574
  "chunks": [
@@ -2577,25 +2541,25 @@
2577
  0
2578
  ],
2579
  "shape": [
2580
- 1152
2581
  ],
2582
  "filename_index": 0
2583
  },
2584
  {
2585
  "offsets": [
2586
- 1152
2587
  ],
2588
  "shape": [
2589
- 1152
2590
  ],
2591
  "filename_index": 1
2592
  }
2593
  ]
2594
  },
2595
- "h.8.ln_1.bias": {
2596
  "type": "Distributed",
2597
  "shape": [
2598
- 768
2599
  ],
2600
  "dtype": "F32",
2601
  "chunks": [
@@ -2604,81 +2568,91 @@
2604
  0
2605
  ],
2606
  "shape": [
2607
- 384
2608
  ],
2609
  "filename_index": 0
2610
  },
2611
  {
2612
  "offsets": [
2613
- 384
2614
  ],
2615
  "shape": [
2616
- 384
2617
  ],
2618
  "filename_index": 1
2619
  }
2620
  ]
2621
  },
2622
- "h.4.mlp.c_proj.weight": {
2623
  "type": "Distributed",
2624
  "shape": [
2625
- 3072,
2626
  768
2627
  ],
2628
  "dtype": "F32",
2629
  "chunks": [
2630
  {
2631
  "offsets": [
2632
- 0,
2633
  0
2634
  ],
2635
  "shape": [
2636
- 1536,
2637
- 768
2638
  ],
2639
  "filename_index": 0
2640
  },
2641
  {
2642
  "offsets": [
2643
- 1536,
2644
- 0
2645
  ],
2646
  "shape": [
2647
- 1536,
2648
- 768
2649
  ],
2650
  "filename_index": 1
2651
  }
2652
  ]
2653
  },
2654
- "h.10.ln_1.bias": {
2655
  "type": "Distributed",
2656
  "shape": [
2657
- 768
 
 
 
2658
  ],
2659
  "dtype": "F32",
2660
  "chunks": [
2661
  {
2662
  "offsets": [
 
 
 
2663
  0
2664
  ],
2665
  "shape": [
2666
- 384
 
 
 
2667
  ],
2668
  "filename_index": 0
2669
  },
2670
  {
2671
  "offsets": [
2672
- 384
 
 
 
2673
  ],
2674
  "shape": [
2675
- 384
 
 
 
2676
  ],
2677
  "filename_index": 1
2678
  }
2679
  ]
2680
  },
2681
- "h.1.ln_2.weight": {
2682
  "type": "Distributed",
2683
  "shape": [
2684
  768
@@ -2705,7 +2679,7 @@
2705
  }
2706
  ]
2707
  },
2708
- "h.10.ln_1.weight": {
2709
  "type": "Distributed",
2710
  "shape": [
2711
  768
@@ -2732,147 +2706,182 @@
2732
  }
2733
  ]
2734
  },
2735
- "h.11.mlp.c_fc.bias": {
2736
  "type": "Distributed",
2737
  "shape": [
 
2738
  3072
2739
  ],
2740
  "dtype": "F32",
2741
  "chunks": [
2742
  {
2743
  "offsets": [
 
2744
  0
2745
  ],
2746
  "shape": [
 
2747
  1536
2748
  ],
2749
  "filename_index": 0
2750
  },
2751
  {
2752
  "offsets": [
 
2753
  1536
2754
  ],
2755
  "shape": [
 
2756
  1536
2757
  ],
2758
  "filename_index": 1
2759
  }
2760
  ]
2761
  },
2762
- "h.10.mlp.c_fc.bias": {
2763
  "type": "Distributed",
2764
  "shape": [
2765
- 3072
 
 
 
2766
  ],
2767
  "dtype": "F32",
2768
  "chunks": [
2769
  {
2770
  "offsets": [
 
 
 
2771
  0
2772
  ],
2773
  "shape": [
2774
- 1536
 
 
 
2775
  ],
2776
  "filename_index": 0
2777
  },
2778
  {
2779
  "offsets": [
2780
- 1536
 
 
 
2781
  ],
2782
  "shape": [
2783
- 1536
 
 
 
2784
  ],
2785
  "filename_index": 1
2786
  }
2787
  ]
2788
  },
2789
- "h.5.mlp.c_proj.weight": {
2790
  "type": "Distributed",
2791
  "shape": [
2792
- 3072,
2793
  768
2794
  ],
2795
  "dtype": "F32",
2796
  "chunks": [
2797
  {
2798
  "offsets": [
2799
- 0,
2800
  0
2801
  ],
2802
  "shape": [
2803
- 1536,
2804
- 768
2805
  ],
2806
  "filename_index": 0
2807
  },
2808
  {
2809
  "offsets": [
2810
- 1536,
2811
- 0
2812
  ],
2813
  "shape": [
2814
- 1536,
2815
- 768
2816
  ],
2817
  "filename_index": 1
2818
  }
2819
  ]
2820
  },
2821
- "h.10.attn.c_attn.bias": {
2822
  "type": "Distributed",
2823
  "shape": [
2824
- 2304
 
 
 
2825
  ],
2826
  "dtype": "F32",
2827
  "chunks": [
2828
  {
2829
  "offsets": [
 
 
 
2830
  0
2831
  ],
2832
  "shape": [
2833
- 1152
 
 
 
2834
  ],
2835
  "filename_index": 0
2836
  },
2837
  {
2838
  "offsets": [
2839
- 1152
 
 
 
2840
  ],
2841
  "shape": [
2842
- 1152
 
 
 
2843
  ],
2844
  "filename_index": 1
2845
  }
2846
  ]
2847
  },
2848
- "h.11.ln_2.bias": {
2849
  "type": "Distributed",
2850
  "shape": [
2851
- 768
 
2852
  ],
2853
  "dtype": "F32",
2854
  "chunks": [
2855
  {
2856
  "offsets": [
 
2857
  0
2858
  ],
2859
  "shape": [
2860
- 384
 
2861
  ],
2862
  "filename_index": 0
2863
  },
2864
  {
2865
  "offsets": [
2866
- 384
 
2867
  ],
2868
  "shape": [
2869
- 384
 
2870
  ],
2871
  "filename_index": 1
2872
  }
2873
  ]
2874
  },
2875
- "h.11.mlp.c_proj.bias": {
2876
  "type": "Distributed",
2877
  "shape": [
2878
  768
@@ -2899,10 +2908,10 @@
2899
  }
2900
  ]
2901
  },
2902
- "h.5.ln_2.bias": {
2903
  "type": "Distributed",
2904
  "shape": [
2905
- 768
2906
  ],
2907
  "dtype": "F32",
2908
  "chunks": [
@@ -2911,25 +2920,25 @@
2911
  0
2912
  ],
2913
  "shape": [
2914
- 384
2915
  ],
2916
  "filename_index": 0
2917
  },
2918
  {
2919
  "offsets": [
2920
- 384
2921
  ],
2922
  "shape": [
2923
- 384
2924
  ],
2925
  "filename_index": 1
2926
  }
2927
  ]
2928
  },
2929
- "h.4.attn.c_attn.bias": {
2930
  "type": "Distributed",
2931
  "shape": [
2932
- 2304
2933
  ],
2934
  "dtype": "F32",
2935
  "chunks": [
@@ -2938,76 +2947,96 @@
2938
  0
2939
  ],
2940
  "shape": [
2941
- 1152
2942
  ],
2943
  "filename_index": 0
2944
  },
2945
  {
2946
  "offsets": [
2947
- 1152
2948
  ],
2949
  "shape": [
2950
- 1152
2951
  ],
2952
  "filename_index": 1
2953
  }
2954
  ]
2955
  },
2956
- "h.4.ln_2.bias": {
2957
  "type": "Distributed",
2958
  "shape": [
2959
- 768
 
 
 
2960
  ],
2961
  "dtype": "F32",
2962
  "chunks": [
2963
  {
2964
  "offsets": [
 
 
 
2965
  0
2966
  ],
2967
  "shape": [
2968
- 384
 
 
 
2969
  ],
2970
  "filename_index": 0
2971
  },
2972
  {
2973
  "offsets": [
2974
- 384
 
 
 
2975
  ],
2976
  "shape": [
2977
- 384
 
 
 
2978
  ],
2979
  "filename_index": 1
2980
  }
2981
  ]
2982
  },
2983
- "h.2.attn.c_attn.bias": {
2984
  "type": "Distributed",
2985
  "shape": [
 
2986
  2304
2987
  ],
2988
  "dtype": "F32",
2989
  "chunks": [
2990
  {
2991
  "offsets": [
 
2992
  0
2993
  ],
2994
  "shape": [
 
2995
  1152
2996
  ],
2997
  "filename_index": 0
2998
  },
2999
  {
3000
  "offsets": [
 
3001
  1152
3002
  ],
3003
  "shape": [
 
3004
  1152
3005
  ],
3006
  "filename_index": 1
3007
  }
3008
  ]
3009
  },
3010
- "h.2.ln_1.weight": {
3011
  "type": "Distributed",
3012
  "shape": [
3013
  768
@@ -3034,34 +3063,49 @@
3034
  }
3035
  ]
3036
  },
3037
- "h.7.ln_1.bias": {
3038
  "type": "Distributed",
3039
  "shape": [
3040
- 768
 
 
 
3041
  ],
3042
  "dtype": "F32",
3043
  "chunks": [
3044
  {
3045
  "offsets": [
 
 
 
3046
  0
3047
  ],
3048
  "shape": [
3049
- 384
 
 
 
3050
  ],
3051
  "filename_index": 0
3052
  },
3053
  {
3054
  "offsets": [
3055
- 384
 
 
 
3056
  ],
3057
  "shape": [
3058
- 384
 
 
 
3059
  ],
3060
  "filename_index": 1
3061
  }
3062
  ]
3063
  },
3064
- "h.9.attn.c_proj.weight": {
3065
  "type": "Distributed",
3066
  "shape": [
3067
  768,
@@ -3093,7 +3137,7 @@
3093
  }
3094
  ]
3095
  },
3096
- "h.0.mlp.c_proj.bias": {
3097
  "type": "Distributed",
3098
  "shape": [
3099
  768
@@ -3120,7 +3164,7 @@
3120
  }
3121
  ]
3122
  },
3123
- "h.7.attn.c_proj.bias": {
3124
  "type": "Distributed",
3125
  "shape": [
3126
  768
@@ -3147,7 +3191,7 @@
3147
  }
3148
  ]
3149
  },
3150
- "h.9.ln_1.bias": {
3151
  "type": "Distributed",
3152
  "shape": [
3153
  768
@@ -3174,74 +3218,64 @@
3174
  }
3175
  ]
3176
  },
3177
- "h.1.mlp.c_proj.weight": {
3178
  "type": "Distributed",
3179
  "shape": [
3180
- 3072,
3181
  768
3182
  ],
3183
  "dtype": "F32",
3184
  "chunks": [
3185
  {
3186
  "offsets": [
3187
- 0,
3188
  0
3189
  ],
3190
  "shape": [
3191
- 1536,
3192
- 768
3193
  ],
3194
  "filename_index": 0
3195
  },
3196
  {
3197
  "offsets": [
3198
- 1536,
3199
- 0
3200
  ],
3201
  "shape": [
3202
- 1536,
3203
- 768
3204
  ],
3205
  "filename_index": 1
3206
  }
3207
  ]
3208
  },
3209
- "h.11.mlp.c_proj.weight": {
3210
  "type": "Distributed",
3211
  "shape": [
3212
- 3072,
3213
  768
3214
  ],
3215
  "dtype": "F32",
3216
  "chunks": [
3217
  {
3218
  "offsets": [
3219
- 0,
3220
  0
3221
  ],
3222
  "shape": [
3223
- 1536,
3224
- 768
3225
  ],
3226
  "filename_index": 0
3227
  },
3228
  {
3229
  "offsets": [
3230
- 1536,
3231
- 0
3232
  ],
3233
  "shape": [
3234
- 1536,
3235
- 768
3236
  ],
3237
  "filename_index": 1
3238
  }
3239
  ]
3240
  },
3241
- "h.0.ln_1.bias": {
3242
  "type": "Distributed",
3243
  "shape": [
3244
- 768
3245
  ],
3246
  "dtype": "F32",
3247
  "chunks": [
@@ -3250,22 +3284,22 @@
3250
  0
3251
  ],
3252
  "shape": [
3253
- 384
3254
  ],
3255
  "filename_index": 0
3256
  },
3257
  {
3258
  "offsets": [
3259
- 384
3260
  ],
3261
  "shape": [
3262
- 384
3263
  ],
3264
  "filename_index": 1
3265
  }
3266
  ]
3267
  },
3268
- "h.7.mlp.c_proj.bias": {
3269
  "type": "Distributed",
3270
  "shape": [
3271
  768
@@ -3292,10 +3326,10 @@
3292
  }
3293
  ]
3294
  },
3295
- "h.5.ln_1.weight": {
3296
  "type": "Distributed",
3297
  "shape": [
3298
- 768
3299
  ],
3300
  "dtype": "F32",
3301
  "chunks": [
@@ -3304,58 +3338,53 @@
3304
  0
3305
  ],
3306
  "shape": [
3307
- 384
3308
  ],
3309
  "filename_index": 0
3310
  },
3311
  {
3312
  "offsets": [
3313
- 384
3314
  ],
3315
  "shape": [
3316
- 384
3317
  ],
3318
  "filename_index": 1
3319
  }
3320
  ]
3321
  },
3322
- "wpe.weight": {
3323
  "type": "Distributed",
3324
  "shape": [
3325
- 1024,
3326
  768
3327
  ],
3328
  "dtype": "F32",
3329
  "chunks": [
3330
  {
3331
  "offsets": [
3332
- 0,
3333
  0
3334
  ],
3335
  "shape": [
3336
- 1024,
3337
  384
3338
  ],
3339
  "filename_index": 0
3340
  },
3341
  {
3342
  "offsets": [
3343
- 0,
3344
  384
3345
  ],
3346
  "shape": [
3347
- 1024,
3348
  384
3349
  ],
3350
  "filename_index": 1
3351
  }
3352
  ]
3353
  },
3354
- "h.5.mlp.c_fc.weight": {
3355
  "type": "Distributed",
3356
  "shape": [
3357
  768,
3358
- 3072
3359
  ],
3360
  "dtype": "F32",
3361
  "chunks": [
@@ -3366,24 +3395,24 @@
3366
  ],
3367
  "shape": [
3368
  768,
3369
- 1536
3370
  ],
3371
  "filename_index": 0
3372
  },
3373
  {
3374
  "offsets": [
3375
  0,
3376
- 1536
3377
  ],
3378
  "shape": [
3379
  768,
3380
- 1536
3381
  ],
3382
  "filename_index": 1
3383
  }
3384
  ]
3385
  },
3386
- "h.2.attn.bias": {
3387
  "type": "Distributed",
3388
  "shape": [
3389
  1,
@@ -3425,7 +3454,7 @@
3425
  }
3426
  ]
3427
  },
3428
- "h.9.ln_2.bias": {
3429
  "type": "Distributed",
3430
  "shape": [
3431
  768
@@ -3452,10 +3481,10 @@
3452
  }
3453
  ]
3454
  },
3455
- "h.1.attn.c_attn.bias": {
3456
  "type": "Distributed",
3457
  "shape": [
3458
- 2304
3459
  ],
3460
  "dtype": "F32",
3461
  "chunks": [
@@ -3464,154 +3493,169 @@
3464
  0
3465
  ],
3466
  "shape": [
3467
- 1152
3468
  ],
3469
  "filename_index": 0
3470
  },
3471
  {
3472
  "offsets": [
3473
- 1152
3474
  ],
3475
  "shape": [
3476
- 1152
3477
  ],
3478
  "filename_index": 1
3479
  }
3480
  ]
3481
  },
3482
- "h.9.attn.bias": {
3483
  "type": "Distributed",
3484
  "shape": [
3485
- 1,
3486
- 1,
3487
- 1024,
3488
- 1024
3489
  ],
3490
  "dtype": "F32",
3491
  "chunks": [
3492
  {
3493
  "offsets": [
3494
- 0,
3495
- 0,
3496
  0,
3497
  0
3498
  ],
3499
  "shape": [
3500
- 1,
3501
- 1,
3502
- 1024,
3503
- 512
3504
  ],
3505
  "filename_index": 0
3506
  },
3507
  {
3508
  "offsets": [
3509
- 0,
3510
- 0,
3511
- 0,
3512
- 512
3513
  ],
3514
  "shape": [
3515
- 1,
3516
- 1,
3517
- 1024,
3518
- 512
3519
  ],
3520
  "filename_index": 1
3521
  }
3522
  ]
3523
  },
3524
- "h.0.attn.c_attn.weight": {
3525
  "type": "Distributed",
3526
  "shape": [
3527
- 768,
3528
- 2304
3529
  ],
3530
  "dtype": "F32",
3531
  "chunks": [
3532
  {
3533
  "offsets": [
3534
- 0,
3535
  0
3536
  ],
3537
  "shape": [
3538
- 768,
3539
- 1152
3540
  ],
3541
  "filename_index": 0
3542
  },
3543
  {
3544
  "offsets": [
3545
- 0,
3546
- 1152
3547
  ],
3548
  "shape": [
3549
- 768,
3550
- 1152
3551
  ],
3552
  "filename_index": 1
3553
  }
3554
  ]
3555
  },
3556
- "h.0.ln_2.weight": {
3557
  "type": "Distributed",
3558
  "shape": [
3559
- 768
 
 
 
3560
  ],
3561
  "dtype": "F32",
3562
  "chunks": [
3563
  {
3564
  "offsets": [
 
 
 
3565
  0
3566
  ],
3567
  "shape": [
3568
- 384
 
 
 
3569
  ],
3570
  "filename_index": 0
3571
  },
3572
  {
3573
  "offsets": [
3574
- 384
 
 
 
3575
  ],
3576
  "shape": [
3577
- 384
 
 
 
3578
  ],
3579
  "filename_index": 1
3580
  }
3581
  ]
3582
  },
3583
- "h.5.ln_2.weight": {
3584
  "type": "Distributed",
3585
  "shape": [
3586
- 768
 
 
 
3587
  ],
3588
  "dtype": "F32",
3589
  "chunks": [
3590
  {
3591
  "offsets": [
 
 
 
3592
  0
3593
  ],
3594
  "shape": [
3595
- 384
 
 
 
3596
  ],
3597
  "filename_index": 0
3598
  },
3599
  {
3600
  "offsets": [
3601
- 384
 
 
 
3602
  ],
3603
  "shape": [
3604
- 384
 
 
 
3605
  ],
3606
  "filename_index": 1
3607
  }
3608
  ]
3609
  },
3610
- "h.6.mlp.c_fc.weight": {
3611
  "type": "Distributed",
3612
  "shape": [
3613
- 768,
3614
- 3072
3615
  ],
3616
  "dtype": "F32",
3617
  "chunks": [
@@ -3621,25 +3665,25 @@
3621
  0
3622
  ],
3623
  "shape": [
3624
- 768,
3625
- 1536
3626
  ],
3627
  "filename_index": 0
3628
  },
3629
  {
3630
  "offsets": [
3631
- 0,
3632
- 1536
3633
  ],
3634
  "shape": [
3635
- 768,
3636
- 1536
3637
  ],
3638
  "filename_index": 1
3639
  }
3640
  ]
3641
  },
3642
- "h.3.mlp.c_proj.bias": {
3643
  "type": "Distributed",
3644
  "shape": [
3645
  768
@@ -3666,10 +3710,10 @@
3666
  }
3667
  ]
3668
  },
3669
- "h.10.ln_2.weight": {
3670
  "type": "Distributed",
3671
  "shape": [
3672
- 768
3673
  ],
3674
  "dtype": "F32",
3675
  "chunks": [
@@ -3678,150 +3722,140 @@
3678
  0
3679
  ],
3680
  "shape": [
3681
- 384
3682
  ],
3683
  "filename_index": 0
3684
  },
3685
  {
3686
  "offsets": [
3687
- 384
3688
  ],
3689
  "shape": [
3690
- 384
3691
  ],
3692
  "filename_index": 1
3693
  }
3694
  ]
3695
  },
3696
- "h.1.attn.bias": {
3697
  "type": "Distributed",
3698
  "shape": [
3699
- 1,
3700
- 1,
3701
- 1024,
3702
- 1024
3703
  ],
3704
  "dtype": "F32",
3705
  "chunks": [
3706
  {
3707
  "offsets": [
3708
- 0,
3709
- 0,
3710
- 0,
3711
  0
3712
  ],
3713
  "shape": [
3714
- 1,
3715
- 1,
3716
- 1024,
3717
- 512
3718
  ],
3719
  "filename_index": 0
3720
  },
3721
  {
3722
  "offsets": [
3723
- 0,
3724
- 0,
3725
- 0,
3726
- 512
3727
  ],
3728
  "shape": [
3729
- 1,
3730
- 1,
3731
- 1024,
3732
- 512
3733
  ],
3734
  "filename_index": 1
3735
  }
3736
  ]
3737
  },
3738
- "h.1.ln_1.bias": {
3739
  "type": "Distributed",
3740
  "shape": [
 
3741
  768
3742
  ],
3743
  "dtype": "F32",
3744
  "chunks": [
3745
  {
3746
  "offsets": [
 
3747
  0
3748
  ],
3749
  "shape": [
3750
- 384
 
3751
  ],
3752
  "filename_index": 0
3753
  },
3754
  {
3755
  "offsets": [
3756
- 384
 
3757
  ],
3758
  "shape": [
3759
- 384
 
3760
  ],
3761
  "filename_index": 1
3762
  }
3763
  ]
3764
  },
3765
- "h.4.attn.c_proj.bias": {
3766
  "type": "Distributed",
3767
  "shape": [
3768
- 768
 
3769
  ],
3770
  "dtype": "F32",
3771
  "chunks": [
3772
  {
3773
  "offsets": [
 
3774
  0
3775
  ],
3776
  "shape": [
3777
- 384
 
3778
  ],
3779
  "filename_index": 0
3780
  },
3781
  {
3782
  "offsets": [
3783
- 384
 
3784
  ],
3785
  "shape": [
3786
- 384
 
3787
  ],
3788
  "filename_index": 1
3789
  }
3790
  ]
3791
  },
3792
- "h.2.attn.c_attn.weight": {
3793
  "type": "Distributed",
3794
  "shape": [
3795
- 768,
3796
- 2304
3797
  ],
3798
  "dtype": "F32",
3799
  "chunks": [
3800
  {
3801
  "offsets": [
3802
- 0,
3803
  0
3804
  ],
3805
  "shape": [
3806
- 768,
3807
- 1152
3808
  ],
3809
  "filename_index": 0
3810
  },
3811
  {
3812
  "offsets": [
3813
- 0,
3814
- 1152
3815
  ],
3816
  "shape": [
3817
- 768,
3818
- 1152
3819
  ],
3820
  "filename_index": 1
3821
  }
3822
  ]
3823
  },
3824
- "h.0.ln_2.bias": {
3825
  "type": "Distributed",
3826
  "shape": [
3827
  768
@@ -3848,7 +3882,7 @@
3848
  }
3849
  ]
3850
  },
3851
- "h.9.attn.c_proj.bias": {
3852
  "type": "Distributed",
3853
  "shape": [
3854
  768
@@ -3875,7 +3909,7 @@
3875
  }
3876
  ]
3877
  },
3878
- "h.5.attn.c_proj.bias": {
3879
  "type": "Distributed",
3880
  "shape": [
3881
  768
@@ -3902,7 +3936,7 @@
3902
  }
3903
  ]
3904
  },
3905
- "h.9.ln_1.weight": {
3906
  "type": "Distributed",
3907
  "shape": [
3908
  768
@@ -3929,7 +3963,7 @@
3929
  }
3930
  ]
3931
  },
3932
- "h.4.mlp.c_proj.bias": {
3933
  "type": "Distributed",
3934
  "shape": [
3935
  768
@@ -3988,93 +4022,108 @@
3988
  }
3989
  ]
3990
  },
3991
- "h.7.attn.c_proj.weight": {
3992
  "type": "Distributed",
3993
  "shape": [
3994
- 768,
3995
  768
3996
  ],
3997
  "dtype": "F32",
3998
  "chunks": [
3999
  {
4000
  "offsets": [
4001
- 0,
4002
  0
4003
  ],
4004
  "shape": [
4005
- 384,
4006
- 768
4007
  ],
4008
  "filename_index": 0
4009
  },
4010
  {
4011
  "offsets": [
4012
- 384,
4013
- 0
4014
  ],
4015
  "shape": [
4016
- 384,
4017
- 768
4018
  ],
4019
  "filename_index": 1
4020
  }
4021
  ]
4022
  },
4023
- "h.2.mlp.c_fc.bias": {
4024
  "type": "Distributed",
4025
  "shape": [
4026
- 3072
 
4027
  ],
4028
  "dtype": "F32",
4029
  "chunks": [
4030
  {
4031
  "offsets": [
 
4032
  0
4033
  ],
4034
  "shape": [
4035
- 1536
 
4036
  ],
4037
  "filename_index": 0
4038
  },
4039
  {
4040
  "offsets": [
4041
- 1536
 
4042
  ],
4043
  "shape": [
4044
- 1536
 
4045
  ],
4046
  "filename_index": 1
4047
  }
4048
  ]
4049
  },
4050
- "h.7.mlp.c_fc.bias": {
4051
  "type": "Distributed",
4052
  "shape": [
4053
- 3072
 
 
 
4054
  ],
4055
  "dtype": "F32",
4056
  "chunks": [
4057
  {
4058
  "offsets": [
 
 
 
4059
  0
4060
  ],
4061
  "shape": [
4062
- 1536
 
 
 
4063
  ],
4064
  "filename_index": 0
4065
  },
4066
  {
4067
  "offsets": [
4068
- 1536
 
 
 
4069
  ],
4070
  "shape": [
4071
- 1536
 
 
 
4072
  ],
4073
  "filename_index": 1
4074
  }
4075
  ]
4076
  },
4077
- "h.2.ln_2.bias": {
4078
  "type": "Distributed",
4079
  "shape": [
4080
  768
@@ -4101,10 +4150,10 @@
4101
  }
4102
  ]
4103
  },
4104
- "h.6.attn.c_attn.bias": {
4105
  "type": "Distributed",
4106
  "shape": [
4107
- 2304
4108
  ],
4109
  "dtype": "F32",
4110
  "chunks": [
@@ -4113,49 +4162,54 @@
4113
  0
4114
  ],
4115
  "shape": [
4116
- 1152
4117
  ],
4118
  "filename_index": 0
4119
  },
4120
  {
4121
  "offsets": [
4122
- 1152
4123
  ],
4124
  "shape": [
4125
- 1152
4126
  ],
4127
  "filename_index": 1
4128
  }
4129
  ]
4130
  },
4131
- "h.6.mlp.c_fc.bias": {
4132
  "type": "Distributed",
4133
  "shape": [
4134
- 3072
 
4135
  ],
4136
  "dtype": "F32",
4137
  "chunks": [
4138
  {
4139
  "offsets": [
 
4140
  0
4141
  ],
4142
  "shape": [
4143
- 1536
 
4144
  ],
4145
  "filename_index": 0
4146
  },
4147
  {
4148
  "offsets": [
4149
- 1536
 
4150
  ],
4151
  "shape": [
4152
- 1536
 
4153
  ],
4154
  "filename_index": 1
4155
  }
4156
  ]
4157
  },
4158
- "h.0.mlp.c_fc.bias": {
4159
  "type": "Distributed",
4160
  "shape": [
4161
  3072
@@ -4182,43 +4236,38 @@
4182
  }
4183
  ]
4184
  },
4185
- "h.4.mlp.c_fc.weight": {
4186
  "type": "Distributed",
4187
  "shape": [
4188
- 768,
4189
- 3072
4190
  ],
4191
  "dtype": "F32",
4192
  "chunks": [
4193
  {
4194
  "offsets": [
4195
- 0,
4196
  0
4197
  ],
4198
  "shape": [
4199
- 768,
4200
- 1536
4201
  ],
4202
  "filename_index": 0
4203
  },
4204
  {
4205
  "offsets": [
4206
- 0,
4207
- 1536
4208
  ],
4209
  "shape": [
4210
- 768,
4211
- 1536
4212
  ],
4213
  "filename_index": 1
4214
  }
4215
  ]
4216
  },
4217
- "h.11.attn.c_proj.weight": {
4218
  "type": "Distributed",
4219
  "shape": [
4220
  768,
4221
- 768
4222
  ],
4223
  "dtype": "F32",
4224
  "chunks": [
@@ -4228,25 +4277,25 @@
4228
  0
4229
  ],
4230
  "shape": [
4231
- 384,
4232
- 768
4233
  ],
4234
  "filename_index": 0
4235
  },
4236
  {
4237
  "offsets": [
4238
- 384,
4239
- 0
4240
  ],
4241
  "shape": [
4242
- 384,
4243
- 768
4244
  ],
4245
  "filename_index": 1
4246
  }
4247
  ]
4248
  },
4249
- "h.6.ln_1.bias": {
4250
  "type": "Distributed",
4251
  "shape": [
4252
  768
@@ -4273,39 +4322,34 @@
4273
  }
4274
  ]
4275
  },
4276
- "h.0.mlp.c_proj.weight": {
4277
  "type": "Distributed",
4278
  "shape": [
4279
- 3072,
4280
- 768
4281
  ],
4282
  "dtype": "F32",
4283
  "chunks": [
4284
  {
4285
  "offsets": [
4286
- 0,
4287
  0
4288
  ],
4289
  "shape": [
4290
- 1536,
4291
- 768
4292
  ],
4293
  "filename_index": 0
4294
  },
4295
  {
4296
  "offsets": [
4297
- 1536,
4298
- 0
4299
  ],
4300
  "shape": [
4301
- 1536,
4302
- 768
4303
  ],
4304
  "filename_index": 1
4305
  }
4306
  ]
4307
  },
4308
- "h.8.attn.c_attn.bias": {
4309
  "type": "Distributed",
4310
  "shape": [
4311
  2304
@@ -4332,88 +4376,103 @@
4332
  }
4333
  ]
4334
  },
4335
- "h.7.ln_2.bias": {
4336
  "type": "Distributed",
4337
  "shape": [
 
4338
  768
4339
  ],
4340
  "dtype": "F32",
4341
  "chunks": [
4342
  {
4343
  "offsets": [
 
4344
  0
4345
  ],
4346
  "shape": [
4347
- 384
 
4348
  ],
4349
  "filename_index": 0
4350
  },
4351
  {
4352
  "offsets": [
4353
- 384
 
4354
  ],
4355
  "shape": [
4356
- 384
 
4357
  ],
4358
  "filename_index": 1
4359
  }
4360
  ]
4361
  },
4362
- "h.0.attn.c_attn.bias": {
4363
  "type": "Distributed",
4364
  "shape": [
 
4365
  2304
4366
  ],
4367
  "dtype": "F32",
4368
  "chunks": [
4369
  {
4370
  "offsets": [
 
4371
  0
4372
  ],
4373
  "shape": [
 
4374
  1152
4375
  ],
4376
  "filename_index": 0
4377
  },
4378
  {
4379
  "offsets": [
 
4380
  1152
4381
  ],
4382
  "shape": [
 
4383
  1152
4384
  ],
4385
  "filename_index": 1
4386
  }
4387
  ]
4388
  },
4389
- "h.9.mlp.c_proj.bias": {
4390
  "type": "Distributed",
4391
  "shape": [
4392
- 768
 
4393
  ],
4394
  "dtype": "F32",
4395
  "chunks": [
4396
  {
4397
  "offsets": [
 
4398
  0
4399
  ],
4400
  "shape": [
4401
- 384
 
4402
  ],
4403
  "filename_index": 0
4404
  },
4405
  {
4406
  "offsets": [
4407
- 384
 
4408
  ],
4409
  "shape": [
4410
- 384
 
4411
  ],
4412
  "filename_index": 1
4413
  }
4414
  ]
4415
  },
4416
- "h.11.ln_1.bias": {
4417
  "type": "Distributed",
4418
  "shape": [
4419
  768
@@ -4440,79 +4499,42 @@
4440
  }
4441
  ]
4442
  },
4443
- "h.4.attn.bias": {
4444
  "type": "Distributed",
4445
  "shape": [
4446
- 1,
4447
- 1,
4448
- 1024,
4449
- 1024
4450
  ],
4451
  "dtype": "F32",
4452
  "chunks": [
4453
  {
4454
  "offsets": [
4455
- 0,
4456
- 0,
4457
  0,
4458
  0
4459
  ],
4460
  "shape": [
4461
- 1,
4462
- 1,
4463
- 1024,
4464
- 512
4465
  ],
4466
  "filename_index": 0
4467
  },
4468
  {
4469
  "offsets": [
4470
- 0,
4471
- 0,
4472
- 0,
4473
- 512
4474
- ],
4475
- "shape": [
4476
- 1,
4477
- 1,
4478
- 1024,
4479
- 512
4480
- ],
4481
- "filename_index": 1
4482
- }
4483
- ]
4484
- },
4485
- "h.1.ln_1.weight": {
4486
- "type": "Distributed",
4487
- "shape": [
4488
- 768
4489
- ],
4490
- "dtype": "F32",
4491
- "chunks": [
4492
- {
4493
- "offsets": [
4494
  0
4495
  ],
4496
  "shape": [
4497
- 384
4498
- ],
4499
- "filename_index": 0
4500
- },
4501
- {
4502
- "offsets": [
4503
- 384
4504
- ],
4505
- "shape": [
4506
- 384
4507
  ],
4508
  "filename_index": 1
4509
  }
4510
  ]
4511
  },
4512
- "h.6.attn.c_proj.bias": {
4513
  "type": "Distributed",
4514
  "shape": [
4515
- 768
4516
  ],
4517
  "dtype": "F32",
4518
  "chunks": [
@@ -4521,22 +4543,22 @@
4521
  0
4522
  ],
4523
  "shape": [
4524
- 384
4525
  ],
4526
  "filename_index": 0
4527
  },
4528
  {
4529
  "offsets": [
4530
- 384
4531
  ],
4532
  "shape": [
4533
- 384
4534
  ],
4535
  "filename_index": 1
4536
  }
4537
  ]
4538
  },
4539
- "h.1.attn.c_attn.weight": {
4540
  "type": "Distributed",
4541
  "shape": [
4542
  768,
@@ -4568,7 +4590,7 @@
4568
  }
4569
  ]
4570
  },
4571
- "h.6.attn.bias": {
4572
  "type": "Distributed",
4573
  "shape": [
4574
  1,
@@ -4610,38 +4632,11 @@
4610
  }
4611
  ]
4612
  },
4613
- "h.6.mlp.c_proj.bias": {
4614
- "type": "Distributed",
4615
- "shape": [
4616
- 768
4617
- ],
4618
- "dtype": "F32",
4619
- "chunks": [
4620
- {
4621
- "offsets": [
4622
- 0
4623
- ],
4624
- "shape": [
4625
- 384
4626
- ],
4627
- "filename_index": 0
4628
- },
4629
- {
4630
- "offsets": [
4631
- 384
4632
- ],
4633
- "shape": [
4634
- 384
4635
- ],
4636
- "filename_index": 1
4637
- }
4638
- ]
4639
- },
4640
- "h.4.attn.c_attn.weight": {
4641
  "type": "Distributed",
4642
  "shape": [
4643
  768,
4644
- 2304
4645
  ],
4646
  "dtype": "F32",
4647
  "chunks": [
@@ -4651,25 +4646,25 @@
4651
  0
4652
  ],
4653
  "shape": [
4654
- 768,
4655
- 1152
4656
  ],
4657
  "filename_index": 0
4658
  },
4659
  {
4660
  "offsets": [
4661
- 0,
4662
- 1152
4663
  ],
4664
  "shape": [
4665
- 768,
4666
- 1152
4667
  ],
4668
  "filename_index": 1
4669
  }
4670
  ]
4671
  },
4672
- "h.11.attn.c_proj.bias": {
4673
  "type": "Distributed",
4674
  "shape": [
4675
  768
@@ -4696,7 +4691,7 @@
4696
  }
4697
  ]
4698
  },
4699
- "h.2.ln_1.bias": {
4700
  "type": "Distributed",
4701
  "shape": [
4702
  768
@@ -4723,28 +4718,33 @@
4723
  }
4724
  ]
4725
  },
4726
- "h.5.mlp.c_fc.bias": {
4727
  "type": "Distributed",
4728
  "shape": [
4729
- 3072
 
4730
  ],
4731
  "dtype": "F32",
4732
  "chunks": [
4733
  {
4734
  "offsets": [
 
4735
  0
4736
  ],
4737
  "shape": [
4738
- 1536
 
4739
  ],
4740
  "filename_index": 0
4741
  },
4742
  {
4743
  "offsets": [
4744
- 1536
 
4745
  ],
4746
  "shape": [
4747
- 1536
 
4748
  ],
4749
  "filename_index": 1
4750
  }
 
1
  {
2
  "tensors": {
3
+ "h.0.mlp.c_proj.weight": {
4
  "type": "Distributed",
5
  "shape": [
6
+ 3072,
7
  768
8
  ],
9
  "dtype": "F32",
10
  "chunks": [
11
  {
12
  "offsets": [
13
+ 0,
14
  0
15
  ],
16
  "shape": [
17
+ 1536,
18
+ 768
19
  ],
20
  "filename_index": 0
21
  },
22
  {
23
  "offsets": [
24
+ 1536,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  0
26
  ],
27
  "shape": [
28
+ 1536,
29
+ 768
 
 
 
 
 
 
 
 
30
  ],
31
  "filename_index": 1
32
  }
 
64
  }
65
  ]
66
  },
67
+ "h.2.mlp.c_fc.bias": {
68
  "type": "Distributed",
69
  "shape": [
70
+ 3072
 
 
 
71
  ],
72
  "dtype": "F32",
73
  "chunks": [
74
  {
75
  "offsets": [
 
 
 
76
  0
77
  ],
78
  "shape": [
79
+ 1536
 
 
 
80
  ],
81
  "filename_index": 0
82
  },
83
  {
84
  "offsets": [
85
+ 1536
 
 
 
86
  ],
87
  "shape": [
88
+ 1536
 
 
 
89
  ],
90
  "filename_index": 1
91
  }
92
  ]
93
  },
94
+ "h.8.ln_1.weight": {
95
  "type": "Distributed",
96
  "shape": [
97
+ 768
98
  ],
99
  "dtype": "F32",
100
  "chunks": [
 
103
  0
104
  ],
105
  "shape": [
106
+ 384
107
  ],
108
  "filename_index": 0
109
  },
110
  {
111
  "offsets": [
112
+ 384
113
  ],
114
  "shape": [
115
+ 384
116
  ],
117
  "filename_index": 1
118
  }
119
  ]
120
  },
121
+ "h.2.attn.c_attn.weight": {
122
  "type": "Distributed",
123
  "shape": [
124
  768,
125
+ 2304
126
  ],
127
  "dtype": "F32",
128
  "chunks": [
 
133
  ],
134
  "shape": [
135
  768,
136
+ 1152
137
  ],
138
  "filename_index": 0
139
  },
140
  {
141
  "offsets": [
142
  0,
143
+ 1152
144
  ],
145
  "shape": [
146
  768,
147
+ 1152
148
  ],
149
  "filename_index": 1
150
  }
151
  ]
152
  },
153
+ "h.8.attn.c_proj.bias": {
154
  "type": "Distributed",
155
  "shape": [
156
  768
 
177
  }
178
  ]
179
  },
180
+ "ln_f.bias": {
181
  "type": "Distributed",
182
  "shape": [
183
  768
 
204
  }
205
  ]
206
  },
207
+ "h.2.ln_2.weight": {
208
  "type": "Distributed",
209
  "shape": [
210
  768
 
231
  }
232
  ]
233
  },
234
+ "h.6.ln_1.bias": {
235
  "type": "Distributed",
236
  "shape": [
237
  768
 
258
  }
259
  ]
260
  },
261
+ "h.2.mlp.c_proj.weight": {
262
  "type": "Distributed",
263
  "shape": [
264
+ 3072,
265
+ 768
266
  ],
267
  "dtype": "F32",
268
  "chunks": [
 
272
  0
273
  ],
274
  "shape": [
275
+ 1536,
276
+ 768
277
  ],
278
  "filename_index": 0
279
  },
280
  {
281
  "offsets": [
282
+ 1536,
283
+ 0
284
  ],
285
  "shape": [
286
+ 1536,
287
+ 768
288
  ],
289
  "filename_index": 1
290
  }
291
  ]
292
  },
293
+ "h.10.mlp.c_fc.weight": {
294
  "type": "Distributed",
295
  "shape": [
296
+ 768,
297
+ 3072
298
  ],
299
  "dtype": "F32",
300
  "chunks": [
301
  {
302
  "offsets": [
303
+ 0,
304
  0
305
  ],
306
  "shape": [
307
+ 768,
308
+ 1536
309
  ],
310
  "filename_index": 0
311
  },
312
  {
313
  "offsets": [
314
+ 0,
315
+ 1536
316
  ],
317
  "shape": [
318
+ 768,
319
+ 1536
320
  ],
321
  "filename_index": 1
322
  }
323
  ]
324
  },
325
+ "h.8.ln_1.bias": {
326
  "type": "Distributed",
327
  "shape": [
328
  768
 
349
  }
350
  ]
351
  },
352
+ "h.9.attn.c_attn.bias": {
353
  "type": "Distributed",
354
  "shape": [
355
+ 2304
 
356
  ],
357
  "dtype": "F32",
358
  "chunks": [
359
  {
360
  "offsets": [
 
361
  0
362
  ],
363
  "shape": [
364
+ 1152
 
365
  ],
366
  "filename_index": 0
367
  },
368
  {
369
  "offsets": [
370
+ 1152
 
371
  ],
372
  "shape": [
373
+ 1152
 
374
  ],
375
  "filename_index": 1
376
  }
377
  ]
378
  },
379
+ "h.5.ln_2.bias": {
380
  "type": "Distributed",
381
  "shape": [
 
382
  768
383
  ],
384
  "dtype": "F32",
385
  "chunks": [
386
  {
387
  "offsets": [
 
388
  0
389
  ],
390
  "shape": [
391
+ 384
 
392
  ],
393
  "filename_index": 0
394
  },
395
  {
396
  "offsets": [
397
+ 384
 
398
  ],
399
  "shape": [
400
+ 384
 
401
  ],
402
  "filename_index": 1
403
  }
404
  ]
405
  },
406
+ "h.6.mlp.c_proj.weight": {
407
  "type": "Distributed",
408
  "shape": [
409
+ 3072,
410
  768
411
  ],
412
  "dtype": "F32",
 
417
  0
418
  ],
419
  "shape": [
420
+ 1536,
421
  768
422
  ],
423
  "filename_index": 0
424
  },
425
  {
426
  "offsets": [
427
+ 1536,
428
  0
429
  ],
430
  "shape": [
431
+ 1536,
432
  768
433
  ],
434
  "filename_index": 1
435
  }
436
  ]
437
  },
438
+ "h.7.mlp.c_proj.bias": {
439
  "type": "Distributed",
440
  "shape": [
441
  768
 
462
  }
463
  ]
464
  },
465
+ "h.1.mlp.c_fc.bias": {
466
  "type": "Distributed",
467
  "shape": [
468
+ 3072
469
  ],
470
  "dtype": "F32",
471
  "chunks": [
 
474
  0
475
  ],
476
  "shape": [
477
+ 1536
478
  ],
479
  "filename_index": 0
480
  },
481
  {
482
  "offsets": [
483
+ 1536
484
  ],
485
  "shape": [
486
+ 1536
487
  ],
488
  "filename_index": 1
489
  }
490
  ]
491
  },
492
+ "h.9.mlp.c_fc.bias": {
493
  "type": "Distributed",
494
  "shape": [
495
+ 3072
496
  ],
497
  "dtype": "F32",
498
  "chunks": [
 
501
  0
502
  ],
503
  "shape": [
504
+ 1536
505
  ],
506
  "filename_index": 0
507
  },
508
  {
509
  "offsets": [
510
+ 1536
511
  ],
512
  "shape": [
513
+ 1536
514
  ],
515
  "filename_index": 1
516
  }
517
  ]
518
  },
519
+ "h.5.mlp.c_fc.weight": {
520
  "type": "Distributed",
521
  "shape": [
522
  768,
523
+ 3072
524
  ],
525
  "dtype": "F32",
526
  "chunks": [
 
531
  ],
532
  "shape": [
533
  768,
534
+ 1536
535
  ],
536
  "filename_index": 0
537
  },
538
  {
539
  "offsets": [
540
  0,
541
+ 1536
542
  ],
543
  "shape": [
544
  768,
545
+ 1536
546
  ],
547
  "filename_index": 1
548
  }
549
  ]
550
  },
551
+ "h.7.mlp.c_fc.bias": {
552
  "type": "Distributed",
553
  "shape": [
554
+ 3072
 
555
  ],
556
  "dtype": "F32",
557
  "chunks": [
558
  {
559
  "offsets": [
 
560
  0
561
  ],
562
  "shape": [
563
+ 1536
 
564
  ],
565
  "filename_index": 0
566
  },
567
  {
568
  "offsets": [
569
+ 1536
 
570
  ],
571
  "shape": [
572
+ 1536
 
573
  ],
574
  "filename_index": 1
575
  }
576
  ]
577
  },
578
+ "h.4.attn.c_proj.bias": {
579
  "type": "Distributed",
580
  "shape": [
581
+ 768
 
582
  ],
583
  "dtype": "F32",
584
  "chunks": [
585
  {
586
  "offsets": [
 
587
  0
588
  ],
589
  "shape": [
590
+ 384
 
591
  ],
592
  "filename_index": 0
593
  },
594
  {
595
  "offsets": [
596
+ 384
 
597
  ],
598
  "shape": [
599
+ 384
 
600
  ],
601
  "filename_index": 1
602
  }
603
  ]
604
  },
605
+ "h.1.attn.c_proj.weight": {
606
  "type": "Distributed",
607
  "shape": [
608
  768,
 
634
  }
635
  ]
636
  },
637
+ "h.0.mlp.c_fc.weight": {
638
  "type": "Distributed",
639
  "shape": [
640
+ 768,
641
+ 3072
 
 
642
  ],
643
  "dtype": "F32",
644
  "chunks": [
645
  {
646
  "offsets": [
 
 
647
  0,
648
  0
649
  ],
650
  "shape": [
651
+ 768,
652
+ 1536
 
 
653
  ],
654
  "filename_index": 0
655
  },
656
  {
657
  "offsets": [
658
  0,
659
+ 1536
 
 
660
  ],
661
  "shape": [
662
+ 768,
663
+ 1536
 
 
664
  ],
665
  "filename_index": 1
666
  }
667
  ]
668
  },
669
+ "h.8.mlp.c_fc.weight": {
670
  "type": "Distributed",
671
  "shape": [
672
  768,
 
698
  }
699
  ]
700
  },
701
+ "h.5.ln_2.weight": {
702
  "type": "Distributed",
703
  "shape": [
704
+ 768
705
  ],
706
  "dtype": "F32",
707
  "chunks": [
 
710
  0
711
  ],
712
  "shape": [
713
+ 384
714
  ],
715
  "filename_index": 0
716
  },
717
  {
718
  "offsets": [
719
+ 384
720
  ],
721
  "shape": [
722
+ 384
723
  ],
724
  "filename_index": 1
725
  }
726
  ]
727
  },
728
+ "h.11.mlp.c_fc.weight": {
729
  "type": "Distributed",
730
  "shape": [
731
  768,
732
+ 3072
733
  ],
734
  "dtype": "F32",
735
  "chunks": [
 
740
  ],
741
  "shape": [
742
  768,
743
+ 1536
744
  ],
745
  "filename_index": 0
746
  },
747
  {
748
  "offsets": [
749
  0,
750
+ 1536
751
  ],
752
  "shape": [
753
  768,
754
+ 1536
755
  ],
756
  "filename_index": 1
757
  }
758
  ]
759
  },
760
+ "h.9.mlp.c_proj.bias": {
761
  "type": "Distributed",
762
  "shape": [
763
+ 768
 
764
  ],
765
  "dtype": "F32",
766
  "chunks": [
767
  {
768
  "offsets": [
 
769
  0
770
  ],
771
  "shape": [
772
+ 384
 
773
  ],
774
  "filename_index": 0
775
  },
776
  {
777
  "offsets": [
778
+ 384
 
779
  ],
780
  "shape": [
781
+ 384
 
782
  ],
783
  "filename_index": 1
784
  }
785
  ]
786
  },
787
+ "h.6.mlp.c_fc.bias": {
788
  "type": "Distributed",
789
  "shape": [
790
  3072
 
811
  }
812
  ]
813
  },
814
+ "h.5.mlp.c_proj.weight": {
815
  "type": "Distributed",
816
  "shape": [
817
+ 3072,
818
  768
819
  ],
820
  "dtype": "F32",
 
825
  0
826
  ],
827
  "shape": [
828
+ 1536,
829
  768
830
  ],
831
  "filename_index": 0
832
  },
833
  {
834
  "offsets": [
835
+ 1536,
836
  0
837
  ],
838
  "shape": [
839
+ 1536,
840
  768
841
  ],
842
  "filename_index": 1
843
  }
844
  ]
845
  },
846
+ "h.7.attn.c_proj.bias": {
847
  "type": "Distributed",
848
  "shape": [
849
  768
 
870
  }
871
  ]
872
  },
873
+ "h.6.attn.c_proj.weight": {
874
  "type": "Distributed",
875
  "shape": [
876
+ 768,
877
+ 768
878
  ],
879
  "dtype": "F32",
880
  "chunks": [
881
  {
882
  "offsets": [
883
+ 0,
884
  0
885
  ],
886
  "shape": [
887
+ 384,
888
+ 768
889
  ],
890
  "filename_index": 0
891
  },
892
  {
893
  "offsets": [
894
+ 384,
895
+ 0
896
  ],
897
  "shape": [
898
+ 384,
899
+ 768
900
  ],
901
  "filename_index": 1
902
  }
903
  ]
904
  },
905
+ "wpe.weight": {
906
  "type": "Distributed",
907
  "shape": [
908
+ 1024,
909
+ 768
910
  ],
911
  "dtype": "F32",
912
  "chunks": [
913
  {
914
  "offsets": [
915
+ 0,
916
  0
917
  ],
918
  "shape": [
919
+ 1024,
920
+ 384
921
  ],
922
  "filename_index": 0
923
  },
924
  {
925
  "offsets": [
926
+ 0,
927
+ 384
928
  ],
929
  "shape": [
930
+ 1024,
931
+ 384
932
  ],
933
  "filename_index": 1
934
  }
935
  ]
936
  },
937
+ "h.1.attn.c_proj.bias": {
938
  "type": "Distributed",
939
  "shape": [
940
  768
 
961
  }
962
  ]
963
  },
964
+ "h.3.mlp.c_proj.weight": {
965
  "type": "Distributed",
966
  "shape": [
967
+ 3072,
968
  768
969
  ],
970
  "dtype": "F32",
 
975
  0
976
  ],
977
  "shape": [
978
+ 1536,
979
  768
980
  ],
981
  "filename_index": 0
982
  },
983
  {
984
  "offsets": [
985
+ 1536,
986
  0
987
  ],
988
  "shape": [
989
+ 1536,
990
  768
991
  ],
992
  "filename_index": 1
993
  }
994
  ]
995
  },
996
+ "h.2.mlp.c_fc.weight": {
997
  "type": "Distributed",
998
  "shape": [
999
+ 768,
1000
+ 3072
1001
  ],
1002
  "dtype": "F32",
1003
  "chunks": [
 
1007
  0
1008
  ],
1009
  "shape": [
1010
+ 768,
1011
+ 1536
1012
  ],
1013
  "filename_index": 0
1014
  },
1015
  {
1016
  "offsets": [
1017
+ 0,
1018
+ 1536
1019
  ],
1020
  "shape": [
1021
+ 768,
1022
+ 1536
1023
  ],
1024
  "filename_index": 1
1025
  }
1026
  ]
1027
  },
1028
+ "h.1.attn.bias": {
1029
  "type": "Distributed",
1030
  "shape": [
1031
+ 1,
1032
+ 1,
1033
+ 1024,
1034
+ 1024
1035
  ],
1036
  "dtype": "F32",
1037
  "chunks": [
1038
  {
1039
  "offsets": [
1040
+ 0,
1041
+ 0,
1042
+ 0,
1043
  0
1044
  ],
1045
  "shape": [
1046
+ 1,
1047
+ 1,
1048
+ 1024,
1049
+ 512
1050
  ],
1051
  "filename_index": 0
1052
  },
1053
  {
1054
  "offsets": [
1055
+ 0,
1056
+ 0,
1057
+ 0,
1058
+ 512
1059
  ],
1060
  "shape": [
1061
+ 1,
1062
+ 1,
1063
+ 1024,
1064
+ 512
1065
  ],
1066
  "filename_index": 1
1067
  }
1068
  ]
1069
  },
1070
+ "h.4.ln_1.bias": {
1071
  "type": "Distributed",
1072
  "shape": [
1073
  768
 
1094
  }
1095
  ]
1096
  },
1097
+ "h.0.ln_1.weight": {
1098
  "type": "Distributed",
1099
  "shape": [
1100
  768
 
1121
  }
1122
  ]
1123
  },
1124
+ "h.2.mlp.c_proj.bias": {
1125
  "type": "Distributed",
1126
  "shape": [
1127
+ 768
 
 
 
1128
  ],
1129
  "dtype": "F32",
1130
  "chunks": [
1131
  {
1132
  "offsets": [
 
 
 
1133
  0
1134
  ],
1135
  "shape": [
1136
+ 384
 
 
 
1137
  ],
1138
  "filename_index": 0
1139
  },
1140
  {
1141
  "offsets": [
1142
+ 384
 
 
 
1143
  ],
1144
  "shape": [
1145
+ 384
 
 
 
1146
  ],
1147
  "filename_index": 1
1148
  }
1149
  ]
1150
  },
1151
+ "h.7.attn.c_proj.weight": {
1152
  "type": "Distributed",
1153
  "shape": [
1154
+ 768,
1155
+ 768
 
 
1156
  ],
1157
  "dtype": "F32",
1158
  "chunks": [
1159
  {
1160
  "offsets": [
 
 
1161
  0,
1162
  0
1163
  ],
1164
  "shape": [
1165
+ 384,
1166
+ 768
 
 
1167
  ],
1168
  "filename_index": 0
1169
  },
1170
  {
1171
  "offsets": [
1172
+ 384,
1173
+ 0
 
 
1174
  ],
1175
  "shape": [
1176
+ 384,
1177
+ 768
 
 
1178
  ],
1179
  "filename_index": 1
1180
  }
1181
  ]
1182
  },
1183
+ "h.11.ln_2.bias": {
1184
  "type": "Distributed",
1185
  "shape": [
1186
  768
 
1207
  }
1208
  ]
1209
  },
1210
+ "h.9.attn.c_proj.weight": {
1211
  "type": "Distributed",
1212
  "shape": [
1213
+ 768,
1214
  768
1215
  ],
1216
  "dtype": "F32",
 
1221
  0
1222
  ],
1223
  "shape": [
1224
+ 384,
1225
  768
1226
  ],
1227
  "filename_index": 0
1228
  },
1229
  {
1230
  "offsets": [
1231
+ 384,
1232
  0
1233
  ],
1234
  "shape": [
1235
+ 384,
1236
  768
1237
  ],
1238
  "filename_index": 1
1239
  }
1240
  ]
1241
  },
1242
+ "h.11.attn.c_proj.bias": {
1243
  "type": "Distributed",
1244
  "shape": [
1245
  768
 
1266
  }
1267
  ]
1268
  },
1269
+ "h.1.mlp.c_fc.weight": {
1270
  "type": "Distributed",
1271
  "shape": [
1272
+ 768,
1273
+ 3072
1274
  ],
1275
  "dtype": "F32",
1276
  "chunks": [
 
1280
  0
1281
  ],
1282
  "shape": [
1283
+ 768,
1284
+ 1536
1285
  ],
1286
  "filename_index": 0
1287
  },
1288
  {
1289
  "offsets": [
1290
+ 0,
1291
+ 1536
1292
  ],
1293
  "shape": [
1294
+ 768,
1295
+ 1536
1296
  ],
1297
  "filename_index": 1
1298
  }
1299
  ]
1300
  },
1301
+ "h.11.attn.c_attn.weight": {
1302
  "type": "Distributed",
1303
  "shape": [
1304
  768,
 
1330
  }
1331
  ]
1332
  },
1333
+ "h.3.mlp.c_fc.weight": {
1334
  "type": "Distributed",
1335
  "shape": [
1336
+ 768,
1337
+ 3072
1338
  ],
1339
  "dtype": "F32",
1340
  "chunks": [
1341
  {
1342
  "offsets": [
1343
+ 0,
1344
  0
1345
  ],
1346
  "shape": [
1347
+ 768,
1348
+ 1536
1349
  ],
1350
  "filename_index": 0
1351
  },
1352
  {
1353
  "offsets": [
1354
+ 0,
1355
+ 1536
1356
  ],
1357
  "shape": [
1358
+ 768,
1359
+ 1536
1360
  ],
1361
  "filename_index": 1
1362
  }
1363
  ]
1364
  },
1365
+ "h.9.ln_1.weight": {
1366
  "type": "Distributed",
1367
  "shape": [
1368
+ 768
 
1369
  ],
1370
  "dtype": "F32",
1371
  "chunks": [
1372
  {
1373
  "offsets": [
 
1374
  0
1375
  ],
1376
  "shape": [
1377
+ 384
 
1378
  ],
1379
  "filename_index": 0
1380
  },
1381
  {
1382
  "offsets": [
1383
+ 384
 
1384
  ],
1385
  "shape": [
1386
+ 384
 
1387
  ],
1388
  "filename_index": 1
1389
  }
1390
  ]
1391
  },
1392
+ "h.5.ln_1.weight": {
1393
  "type": "Distributed",
1394
  "shape": [
 
1395
  768
1396
  ],
1397
  "dtype": "F32",
1398
  "chunks": [
1399
  {
1400
  "offsets": [
 
1401
  0
1402
  ],
1403
  "shape": [
1404
+ 384
 
1405
  ],
1406
  "filename_index": 0
1407
  },
1408
  {
1409
  "offsets": [
1410
+ 384
 
1411
  ],
1412
  "shape": [
1413
+ 384
 
1414
  ],
1415
  "filename_index": 1
1416
  }
1417
  ]
1418
  },
1419
+ "h.4.attn.c_attn.bias": {
1420
  "type": "Distributed",
1421
  "shape": [
1422
+ 2304
1423
  ],
1424
  "dtype": "F32",
1425
  "chunks": [
 
1428
  0
1429
  ],
1430
  "shape": [
1431
+ 1152
1432
  ],
1433
  "filename_index": 0
1434
  },
1435
  {
1436
  "offsets": [
1437
+ 1152
1438
  ],
1439
  "shape": [
1440
+ 1152
1441
  ],
1442
  "filename_index": 1
1443
  }
1444
  ]
1445
  },
1446
+ "h.10.attn.c_attn.bias": {
1447
  "type": "Distributed",
1448
  "shape": [
1449
+ 2304
1450
  ],
1451
  "dtype": "F32",
1452
  "chunks": [
 
1455
  0
1456
  ],
1457
  "shape": [
1458
+ 1152
1459
  ],
1460
  "filename_index": 0
1461
  },
1462
  {
1463
  "offsets": [
1464
+ 1152
1465
  ],
1466
  "shape": [
1467
+ 1152
1468
  ],
1469
  "filename_index": 1
1470
  }
1471
  ]
1472
  },
1473
+ "wte.weight": {
1474
  "type": "Distributed",
1475
  "shape": [
1476
+ 50257,
1477
+ 768
1478
  ],
1479
  "dtype": "F32",
1480
  "chunks": [
1481
  {
1482
  "offsets": [
1483
+ 0,
1484
  0
1485
  ],
1486
  "shape": [
1487
+ 50257,
1488
+ 384
1489
  ],
1490
  "filename_index": 0
1491
  },
1492
  {
1493
  "offsets": [
1494
+ 0,
1495
+ 384
1496
  ],
1497
  "shape": [
1498
+ 50257,
1499
+ 384
1500
  ],
1501
  "filename_index": 1
1502
  }
1503
  ]
1504
  },
1505
+ "h.4.mlp.c_fc.weight": {
1506
  "type": "Distributed",
1507
  "shape": [
1508
  768,
 
1534
  }
1535
  ]
1536
  },
1537
+ "h.0.mlp.c_proj.bias": {
1538
  "type": "Distributed",
1539
  "shape": [
1540
  768
 
1561
  }
1562
  ]
1563
  },
1564
+ "h.10.mlp.c_fc.bias": {
1565
  "type": "Distributed",
1566
  "shape": [
1567
+ 3072
1568
  ],
1569
  "dtype": "F32",
1570
  "chunks": [
 
1573
  0
1574
  ],
1575
  "shape": [
1576
+ 1536
1577
  ],
1578
  "filename_index": 0
1579
  },
1580
  {
1581
  "offsets": [
1582
+ 1536
1583
  ],
1584
  "shape": [
1585
+ 1536
1586
  ],
1587
  "filename_index": 1
1588
  }
1589
  ]
1590
  },
1591
+ "ln_f.weight": {
1592
  "type": "Distributed",
1593
  "shape": [
 
1594
  768
1595
  ],
1596
  "dtype": "F32",
1597
  "chunks": [
1598
  {
1599
  "offsets": [
 
1600
  0
1601
  ],
1602
  "shape": [
1603
+ 384
 
1604
  ],
1605
  "filename_index": 0
1606
  },
1607
  {
1608
  "offsets": [
1609
+ 384
1610
+ ],
1611
+ "shape": [
1612
+ 384
1613
+ ],
1614
+ "filename_index": 1
1615
+ }
1616
+ ]
1617
+ },
1618
+ "h.2.ln_2.bias": {
1619
+ "type": "Distributed",
1620
+ "shape": [
1621
+ 768
1622
+ ],
1623
+ "dtype": "F32",
1624
+ "chunks": [
1625
+ {
1626
+ "offsets": [
1627
  0
1628
  ],
1629
  "shape": [
1630
+ 384
1631
+ ],
1632
+ "filename_index": 0
1633
+ },
1634
+ {
1635
+ "offsets": [
1636
+ 384
1637
+ ],
1638
+ "shape": [
1639
+ 384
1640
  ],
1641
  "filename_index": 1
1642
  }
 
1669
  }
1670
  ]
1671
  },
1672
+ "h.11.mlp.c_proj.weight": {
1673
  "type": "Distributed",
1674
  "shape": [
1675
  3072,
 
1701
  }
1702
  ]
1703
  },
1704
+ "h.7.attn.c_attn.bias": {
1705
  "type": "Distributed",
1706
  "shape": [
1707
+ 2304
 
1708
  ],
1709
  "dtype": "F32",
1710
  "chunks": [
1711
  {
1712
  "offsets": [
 
1713
  0
1714
  ],
1715
  "shape": [
1716
+ 1152
 
1717
  ],
1718
  "filename_index": 0
1719
  },
1720
  {
1721
  "offsets": [
1722
+ 1152
 
1723
  ],
1724
  "shape": [
1725
+ 1152
 
1726
  ],
1727
  "filename_index": 1
1728
  }
1729
  ]
1730
  },
1731
+ "h.4.mlp.c_fc.bias": {
1732
+ "type": "Distributed",
1733
+ "shape": [
1734
+ 3072
1735
+ ],
1736
+ "dtype": "F32",
1737
+ "chunks": [
1738
+ {
1739
+ "offsets": [
1740
+ 0
1741
+ ],
1742
+ "shape": [
1743
+ 1536
1744
+ ],
1745
+ "filename_index": 0
1746
+ },
1747
+ {
1748
+ "offsets": [
1749
+ 1536
1750
+ ],
1751
+ "shape": [
1752
+ 1536
1753
+ ],
1754
+ "filename_index": 1
1755
+ }
1756
+ ]
1757
+ },
1758
+ "h.6.mlp.c_proj.bias": {
1759
  "type": "Distributed",
1760
  "shape": [
1761
  768
 
1782
  }
1783
  ]
1784
  },
1785
+ "h.10.attn.c_proj.weight": {
1786
  "type": "Distributed",
1787
  "shape": [
1788
  768,
1789
+ 768
1790
  ],
1791
  "dtype": "F32",
1792
  "chunks": [
 
1796
  0
1797
  ],
1798
  "shape": [
1799
+ 384,
1800
+ 768
1801
  ],
1802
  "filename_index": 0
1803
  },
1804
  {
1805
  "offsets": [
1806
+ 384,
1807
+ 0
1808
  ],
1809
  "shape": [
1810
+ 384,
1811
+ 768
1812
  ],
1813
  "filename_index": 1
1814
  }
1815
  ]
1816
  },
1817
+ "h.10.ln_1.weight": {
1818
  "type": "Distributed",
1819
  "shape": [
1820
  768
 
1841
  }
1842
  ]
1843
  },
1844
+ "h.5.attn.c_proj.weight": {
1845
  "type": "Distributed",
1846
  "shape": [
1847
+ 768,
1848
  768
1849
  ],
1850
  "dtype": "F32",
1851
  "chunks": [
1852
  {
1853
  "offsets": [
1854
+ 0,
1855
  0
1856
  ],
1857
  "shape": [
1858
+ 384,
1859
+ 768
1860
  ],
1861
  "filename_index": 0
1862
  },
1863
  {
1864
  "offsets": [
1865
+ 384,
1866
+ 0
1867
  ],
1868
  "shape": [
1869
+ 384,
1870
+ 768
1871
  ],
1872
  "filename_index": 1
1873
  }
1874
  ]
1875
  },
1876
+ "h.8.ln_2.bias": {
1877
  "type": "Distributed",
1878
  "shape": [
1879
  768
 
1900
  }
1901
  ]
1902
  },
1903
+ "h.10.ln_1.bias": {
1904
  "type": "Distributed",
1905
  "shape": [
 
1906
  768
1907
  ],
1908
  "dtype": "F32",
1909
  "chunks": [
1910
  {
1911
  "offsets": [
 
1912
  0
1913
  ],
1914
  "shape": [
1915
+ 384
 
1916
  ],
1917
  "filename_index": 0
1918
  },
1919
  {
1920
  "offsets": [
1921
+ 384
 
1922
  ],
1923
  "shape": [
1924
+ 384
 
1925
  ],
1926
  "filename_index": 1
1927
  }
1928
  ]
1929
  },
1930
+ "h.4.mlp.c_proj.bias": {
1931
  "type": "Distributed",
1932
  "shape": [
1933
+ 768
 
 
 
1934
  ],
1935
  "dtype": "F32",
1936
  "chunks": [
1937
  {
1938
  "offsets": [
 
 
 
1939
  0
1940
  ],
1941
  "shape": [
1942
+ 384
 
 
 
1943
  ],
1944
  "filename_index": 0
1945
  },
1946
  {
1947
  "offsets": [
1948
+ 384
 
 
 
1949
  ],
1950
  "shape": [
1951
+ 384
 
 
 
1952
  ],
1953
  "filename_index": 1
1954
  }
1955
  ]
1956
  },
1957
+ "h.5.ln_1.bias": {
1958
  "type": "Distributed",
1959
  "shape": [
1960
+ 768
 
1961
  ],
1962
  "dtype": "F32",
1963
  "chunks": [
1964
  {
1965
  "offsets": [
 
1966
  0
1967
  ],
1968
  "shape": [
1969
+ 384
 
1970
  ],
1971
  "filename_index": 0
1972
  },
1973
  {
1974
  "offsets": [
1975
+ 384
 
1976
  ],
1977
  "shape": [
1978
+ 384
 
1979
  ],
1980
  "filename_index": 1
1981
  }
1982
  ]
1983
  },
1984
+ "h.9.ln_2.bias": {
1985
  "type": "Distributed",
1986
  "shape": [
1987
  768
 
2008
  }
2009
  ]
2010
  },
2011
+ "h.1.attn.c_attn.bias": {
2012
  "type": "Distributed",
2013
  "shape": [
2014
+ 2304
 
 
 
2015
  ],
2016
  "dtype": "F32",
2017
  "chunks": [
2018
  {
2019
  "offsets": [
 
 
 
2020
  0
2021
  ],
2022
  "shape": [
2023
+ 1152
 
 
 
2024
  ],
2025
  "filename_index": 0
2026
  },
2027
  {
2028
  "offsets": [
2029
+ 1152
 
 
 
2030
  ],
2031
  "shape": [
2032
+ 1152
 
 
 
2033
  ],
2034
  "filename_index": 1
2035
  }
2036
  ]
2037
  },
2038
+ "h.0.attn.c_attn.bias": {
2039
  "type": "Distributed",
2040
  "shape": [
2041
+ 2304
 
2042
  ],
2043
  "dtype": "F32",
2044
  "chunks": [
2045
  {
2046
  "offsets": [
 
2047
  0
2048
  ],
2049
  "shape": [
2050
+ 1152
 
2051
  ],
2052
  "filename_index": 0
2053
  },
2054
  {
2055
  "offsets": [
2056
+ 1152
 
2057
  ],
2058
  "shape": [
2059
+ 1152
 
2060
  ],
2061
  "filename_index": 1
2062
  }
2063
  ]
2064
  },
2065
+ "h.9.ln_2.weight": {
2066
  "type": "Distributed",
2067
  "shape": [
2068
+ 768
 
2069
  ],
2070
  "dtype": "F32",
2071
  "chunks": [
2072
  {
2073
  "offsets": [
 
2074
  0
2075
  ],
2076
  "shape": [
2077
+ 384
 
2078
  ],
2079
  "filename_index": 0
2080
  },
2081
  {
2082
  "offsets": [
2083
+ 384
 
2084
  ],
2085
  "shape": [
2086
+ 384
 
2087
  ],
2088
  "filename_index": 1
2089
  }
2090
  ]
2091
  },
2092
+ "h.6.ln_2.weight": {
2093
  "type": "Distributed",
2094
  "shape": [
2095
  768
 
2116
  }
2117
  ]
2118
  },
2119
+ "h.10.ln_2.bias": {
2120
  "type": "Distributed",
2121
  "shape": [
2122
  768
 
2143
  }
2144
  ]
2145
  },
2146
+ "h.10.mlp.c_proj.bias": {
2147
  "type": "Distributed",
2148
  "shape": [
2149
  768
 
2170
  }
2171
  ]
2172
  },
2173
+ "h.2.ln_1.bias": {
2174
  "type": "Distributed",
2175
  "shape": [
2176
+ 768
 
 
 
2177
  ],
2178
  "dtype": "F32",
2179
  "chunks": [
2180
  {
2181
  "offsets": [
 
 
 
2182
  0
2183
  ],
2184
  "shape": [
2185
+ 384
 
 
 
2186
  ],
2187
  "filename_index": 0
2188
  },
2189
  {
2190
  "offsets": [
2191
+ 384
 
 
 
2192
  ],
2193
  "shape": [
2194
+ 384
 
 
 
2195
  ],
2196
  "filename_index": 1
2197
  }
2198
  ]
2199
  },
2200
+ "h.7.mlp.c_proj.weight": {
2201
  "type": "Distributed",
2202
  "shape": [
2203
+ 3072,
2204
  768
2205
  ],
2206
  "dtype": "F32",
2207
  "chunks": [
2208
  {
2209
  "offsets": [
2210
+ 0,
2211
  0
2212
  ],
2213
  "shape": [
2214
+ 1536,
2215
+ 768
2216
  ],
2217
  "filename_index": 0
2218
  },
2219
  {
2220
  "offsets": [
2221
+ 1536,
2222
+ 0
2223
  ],
2224
  "shape": [
2225
+ 1536,
2226
+ 768
2227
  ],
2228
  "filename_index": 1
2229
  }
2230
  ]
2231
  },
2232
+ "h.3.mlp.c_proj.bias": {
2233
  "type": "Distributed",
2234
  "shape": [
2235
  768
 
2256
  }
2257
  ]
2258
  },
2259
+ "h.1.ln_1.bias": {
2260
  "type": "Distributed",
2261
  "shape": [
2262
  768
 
2283
  }
2284
  ]
2285
  },
2286
+ "h.0.ln_2.bias": {
2287
  "type": "Distributed",
2288
  "shape": [
2289
  768
 
2310
  }
2311
  ]
2312
  },
2313
+ "h.4.ln_2.bias": {
2314
  "type": "Distributed",
2315
  "shape": [
2316
  768
 
2337
  }
2338
  ]
2339
  },
2340
+ "h.6.attn.c_attn.weight": {
2341
  "type": "Distributed",
2342
  "shape": [
2343
+ 768,
2344
+ 2304
2345
  ],
2346
  "dtype": "F32",
2347
  "chunks": [
2348
  {
2349
  "offsets": [
2350
+ 0,
2351
  0
2352
  ],
2353
  "shape": [
2354
+ 768,
2355
+ 1152
2356
  ],
2357
  "filename_index": 0
2358
  },
2359
  {
2360
  "offsets": [
2361
+ 0,
2362
+ 1152
2363
  ],
2364
  "shape": [
2365
+ 768,
2366
+ 1152
2367
  ],
2368
  "filename_index": 1
2369
  }
2370
  ]
2371
  },
2372
+ "h.6.attn.bias": {
2373
+ "type": "Distributed",
2374
+ "shape": [
2375
+ 1,
2376
+ 1,
2377
+ 1024,
2378
+ 1024
2379
+ ],
2380
+ "dtype": "F32",
2381
+ "chunks": [
2382
+ {
2383
+ "offsets": [
2384
+ 0,
2385
+ 0,
2386
+ 0,
2387
+ 0
2388
+ ],
2389
+ "shape": [
2390
+ 1,
2391
+ 1,
2392
+ 1024,
2393
+ 512
2394
+ ],
2395
+ "filename_index": 0
2396
+ },
2397
+ {
2398
+ "offsets": [
2399
+ 0,
2400
+ 0,
2401
+ 0,
2402
+ 512
2403
+ ],
2404
+ "shape": [
2405
+ 1,
2406
+ 1,
2407
+ 1024,
2408
+ 512
2409
+ ],
2410
+ "filename_index": 1
2411
+ }
2412
+ ]
2413
+ },
2414
+ "h.9.mlp.c_proj.weight": {
2415
  "type": "Distributed",
2416
  "shape": [
2417
  3072,
 
2443
  }
2444
  ]
2445
  },
2446
+ "h.7.attn.c_attn.weight": {
2447
  "type": "Distributed",
2448
  "shape": [
2449
  768,
2450
+ 2304
2451
  ],
2452
  "dtype": "F32",
2453
  "chunks": [
 
2458
  ],
2459
  "shape": [
2460
  768,
2461
+ 1152
2462
  ],
2463
  "filename_index": 0
2464
  },
2465
  {
2466
  "offsets": [
2467
  0,
2468
+ 1152
2469
  ],
2470
  "shape": [
2471
  768,
2472
+ 1152
2473
  ],
2474
  "filename_index": 1
2475
  }
2476
  ]
2477
  },
2478
+ "h.3.ln_2.weight": {
2479
  "type": "Distributed",
2480
  "shape": [
 
2481
  768
2482
  ],
2483
  "dtype": "F32",
2484
  "chunks": [
2485
  {
2486
  "offsets": [
 
2487
  0
2488
  ],
2489
  "shape": [
2490
+ 384
 
2491
  ],
2492
  "filename_index": 0
2493
  },
2494
  {
2495
  "offsets": [
2496
+ 384
 
2497
  ],
2498
  "shape": [
2499
+ 384
 
2500
  ],
2501
  "filename_index": 1
2502
  }
2503
  ]
2504
  },
2505
+ "h.0.ln_2.weight": {
2506
  "type": "Distributed",
2507
  "shape": [
2508
  768
 
2529
  }
2530
  ]
2531
  },
2532
+ "h.3.ln_1.weight": {
2533
  "type": "Distributed",
2534
  "shape": [
2535
+ 768
2536
  ],
2537
  "dtype": "F32",
2538
  "chunks": [
 
2541
  0
2542
  ],
2543
  "shape": [
2544
+ 384
2545
  ],
2546
  "filename_index": 0
2547
  },
2548
  {
2549
  "offsets": [
2550
+ 384
2551
  ],
2552
  "shape": [
2553
+ 384
2554
  ],
2555
  "filename_index": 1
2556
  }
2557
  ]
2558
  },
2559
+ "h.11.attn.c_attn.bias": {
2560
  "type": "Distributed",
2561
  "shape": [
2562
+ 2304
2563
  ],
2564
  "dtype": "F32",
2565
  "chunks": [
 
2568
  0
2569
  ],
2570
  "shape": [
2571
+ 1152
2572
  ],
2573
  "filename_index": 0
2574
  },
2575
  {
2576
  "offsets": [
2577
+ 1152
2578
  ],
2579
  "shape": [
2580
+ 1152
2581
  ],
2582
  "filename_index": 1
2583
  }
2584
  ]
2585
  },
2586
+ "h.1.ln_1.weight": {
2587
  "type": "Distributed",
2588
  "shape": [
 
2589
  768
2590
  ],
2591
  "dtype": "F32",
2592
  "chunks": [
2593
  {
2594
  "offsets": [
 
2595
  0
2596
  ],
2597
  "shape": [
2598
+ 384
 
2599
  ],
2600
  "filename_index": 0
2601
  },
2602
  {
2603
  "offsets": [
2604
+ 384
 
2605
  ],
2606
  "shape": [
2607
+ 384
 
2608
  ],
2609
  "filename_index": 1
2610
  }
2611
  ]
2612
  },
2613
+ "h.8.attn.bias": {
2614
  "type": "Distributed",
2615
  "shape": [
2616
+ 1,
2617
+ 1,
2618
+ 1024,
2619
+ 1024
2620
  ],
2621
  "dtype": "F32",
2622
  "chunks": [
2623
  {
2624
  "offsets": [
2625
+ 0,
2626
+ 0,
2627
+ 0,
2628
  0
2629
  ],
2630
  "shape": [
2631
+ 1,
2632
+ 1,
2633
+ 1024,
2634
+ 512
2635
  ],
2636
  "filename_index": 0
2637
  },
2638
  {
2639
  "offsets": [
2640
+ 0,
2641
+ 0,
2642
+ 0,
2643
+ 512
2644
  ],
2645
  "shape": [
2646
+ 1,
2647
+ 1,
2648
+ 1024,
2649
+ 512
2650
  ],
2651
  "filename_index": 1
2652
  }
2653
  ]
2654
  },
2655
+ "h.2.attn.c_proj.bias": {
2656
  "type": "Distributed",
2657
  "shape": [
2658
  768
 
2679
  }
2680
  ]
2681
  },
2682
+ "h.7.ln_2.bias": {
2683
  "type": "Distributed",
2684
  "shape": [
2685
  768
 
2706
  }
2707
  ]
2708
  },
2709
+ "h.7.mlp.c_fc.weight": {
2710
  "type": "Distributed",
2711
  "shape": [
2712
+ 768,
2713
  3072
2714
  ],
2715
  "dtype": "F32",
2716
  "chunks": [
2717
  {
2718
  "offsets": [
2719
+ 0,
2720
  0
2721
  ],
2722
  "shape": [
2723
+ 768,
2724
  1536
2725
  ],
2726
  "filename_index": 0
2727
  },
2728
  {
2729
  "offsets": [
2730
+ 0,
2731
  1536
2732
  ],
2733
  "shape": [
2734
+ 768,
2735
  1536
2736
  ],
2737
  "filename_index": 1
2738
  }
2739
  ]
2740
  },
2741
+ "h.10.attn.bias": {
2742
  "type": "Distributed",
2743
  "shape": [
2744
+ 1,
2745
+ 1,
2746
+ 1024,
2747
+ 1024
2748
  ],
2749
  "dtype": "F32",
2750
  "chunks": [
2751
  {
2752
  "offsets": [
2753
+ 0,
2754
+ 0,
2755
+ 0,
2756
  0
2757
  ],
2758
  "shape": [
2759
+ 1,
2760
+ 1,
2761
+ 1024,
2762
+ 512
2763
  ],
2764
  "filename_index": 0
2765
  },
2766
  {
2767
  "offsets": [
2768
+ 0,
2769
+ 0,
2770
+ 0,
2771
+ 512
2772
  ],
2773
  "shape": [
2774
+ 1,
2775
+ 1,
2776
+ 1024,
2777
+ 512
2778
  ],
2779
  "filename_index": 1
2780
  }
2781
  ]
2782
  },
2783
+ "h.7.ln_1.bias": {
2784
  "type": "Distributed",
2785
  "shape": [
 
2786
  768
2787
  ],
2788
  "dtype": "F32",
2789
  "chunks": [
2790
  {
2791
  "offsets": [
 
2792
  0
2793
  ],
2794
  "shape": [
2795
+ 384
 
2796
  ],
2797
  "filename_index": 0
2798
  },
2799
  {
2800
  "offsets": [
2801
+ 384
 
2802
  ],
2803
  "shape": [
2804
+ 384
 
2805
  ],
2806
  "filename_index": 1
2807
  }
2808
  ]
2809
  },
2810
+ "h.0.attn.bias": {
2811
  "type": "Distributed",
2812
  "shape": [
2813
+ 1,
2814
+ 1,
2815
+ 1024,
2816
+ 1024
2817
  ],
2818
  "dtype": "F32",
2819
  "chunks": [
2820
  {
2821
  "offsets": [
2822
+ 0,
2823
+ 0,
2824
+ 0,
2825
  0
2826
  ],
2827
  "shape": [
2828
+ 1,
2829
+ 1,
2830
+ 1024,
2831
+ 512
2832
  ],
2833
  "filename_index": 0
2834
  },
2835
  {
2836
  "offsets": [
2837
+ 0,
2838
+ 0,
2839
+ 0,
2840
+ 512
2841
  ],
2842
  "shape": [
2843
+ 1,
2844
+ 1,
2845
+ 1024,
2846
+ 512
2847
  ],
2848
  "filename_index": 1
2849
  }
2850
  ]
2851
  },
2852
+ "h.6.mlp.c_fc.weight": {
2853
  "type": "Distributed",
2854
  "shape": [
2855
+ 768,
2856
+ 3072
2857
  ],
2858
  "dtype": "F32",
2859
  "chunks": [
2860
  {
2861
  "offsets": [
2862
+ 0,
2863
  0
2864
  ],
2865
  "shape": [
2866
+ 768,
2867
+ 1536
2868
  ],
2869
  "filename_index": 0
2870
  },
2871
  {
2872
  "offsets": [
2873
+ 0,
2874
+ 1536
2875
  ],
2876
  "shape": [
2877
+ 768,
2878
+ 1536
2879
  ],
2880
  "filename_index": 1
2881
  }
2882
  ]
2883
  },
2884
+ "h.7.ln_1.weight": {
2885
  "type": "Distributed",
2886
  "shape": [
2887
  768
 
2908
  }
2909
  ]
2910
  },
2911
+ "h.3.mlp.c_fc.bias": {
2912
  "type": "Distributed",
2913
  "shape": [
2914
+ 3072
2915
  ],
2916
  "dtype": "F32",
2917
  "chunks": [
 
2920
  0
2921
  ],
2922
  "shape": [
2923
+ 1536
2924
  ],
2925
  "filename_index": 0
2926
  },
2927
  {
2928
  "offsets": [
2929
+ 1536
2930
  ],
2931
  "shape": [
2932
+ 1536
2933
  ],
2934
  "filename_index": 1
2935
  }
2936
  ]
2937
  },
2938
+ "h.5.attn.c_proj.bias": {
2939
  "type": "Distributed",
2940
  "shape": [
2941
+ 768
2942
  ],
2943
  "dtype": "F32",
2944
  "chunks": [
 
2947
  0
2948
  ],
2949
  "shape": [
2950
+ 384
2951
  ],
2952
  "filename_index": 0
2953
  },
2954
  {
2955
  "offsets": [
2956
+ 384
2957
  ],
2958
  "shape": [
2959
+ 384
2960
  ],
2961
  "filename_index": 1
2962
  }
2963
  ]
2964
  },
2965
+ "h.11.attn.bias": {
2966
  "type": "Distributed",
2967
  "shape": [
2968
+ 1,
2969
+ 1,
2970
+ 1024,
2971
+ 1024
2972
  ],
2973
  "dtype": "F32",
2974
  "chunks": [
2975
  {
2976
  "offsets": [
2977
+ 0,
2978
+ 0,
2979
+ 0,
2980
  0
2981
  ],
2982
  "shape": [
2983
+ 1,
2984
+ 1,
2985
+ 1024,
2986
+ 512
2987
  ],
2988
  "filename_index": 0
2989
  },
2990
  {
2991
  "offsets": [
2992
+ 0,
2993
+ 0,
2994
+ 0,
2995
+ 512
2996
  ],
2997
  "shape": [
2998
+ 1,
2999
+ 1,
3000
+ 1024,
3001
+ 512
3002
  ],
3003
  "filename_index": 1
3004
  }
3005
  ]
3006
  },
3007
+ "h.8.attn.c_attn.weight": {
3008
  "type": "Distributed",
3009
  "shape": [
3010
+ 768,
3011
  2304
3012
  ],
3013
  "dtype": "F32",
3014
  "chunks": [
3015
  {
3016
  "offsets": [
3017
+ 0,
3018
  0
3019
  ],
3020
  "shape": [
3021
+ 768,
3022
  1152
3023
  ],
3024
  "filename_index": 0
3025
  },
3026
  {
3027
  "offsets": [
3028
+ 0,
3029
  1152
3030
  ],
3031
  "shape": [
3032
+ 768,
3033
  1152
3034
  ],
3035
  "filename_index": 1
3036
  }
3037
  ]
3038
  },
3039
+ "h.9.attn.c_proj.bias": {
3040
  "type": "Distributed",
3041
  "shape": [
3042
  768
 
3063
  }
3064
  ]
3065
  },
3066
+ "h.7.attn.bias": {
3067
  "type": "Distributed",
3068
  "shape": [
3069
+ 1,
3070
+ 1,
3071
+ 1024,
3072
+ 1024
3073
  ],
3074
  "dtype": "F32",
3075
  "chunks": [
3076
  {
3077
  "offsets": [
3078
+ 0,
3079
+ 0,
3080
+ 0,
3081
  0
3082
  ],
3083
  "shape": [
3084
+ 1,
3085
+ 1,
3086
+ 1024,
3087
+ 512
3088
  ],
3089
  "filename_index": 0
3090
  },
3091
  {
3092
  "offsets": [
3093
+ 0,
3094
+ 0,
3095
+ 0,
3096
+ 512
3097
  ],
3098
  "shape": [
3099
+ 1,
3100
+ 1,
3101
+ 1024,
3102
+ 512
3103
  ],
3104
  "filename_index": 1
3105
  }
3106
  ]
3107
  },
3108
+ "h.3.attn.c_proj.weight": {
3109
  "type": "Distributed",
3110
  "shape": [
3111
  768,
 
3137
  }
3138
  ]
3139
  },
3140
+ "h.11.mlp.c_proj.bias": {
3141
  "type": "Distributed",
3142
  "shape": [
3143
  768
 
3164
  }
3165
  ]
3166
  },
3167
+ "h.3.attn.c_proj.bias": {
3168
  "type": "Distributed",
3169
  "shape": [
3170
  768
 
3191
  }
3192
  ]
3193
  },
3194
+ "h.7.ln_2.weight": {
3195
  "type": "Distributed",
3196
  "shape": [
3197
  768
 
3218
  }
3219
  ]
3220
  },
3221
+ "h.9.ln_1.bias": {
3222
  "type": "Distributed",
3223
  "shape": [
 
3224
  768
3225
  ],
3226
  "dtype": "F32",
3227
  "chunks": [
3228
  {
3229
  "offsets": [
 
3230
  0
3231
  ],
3232
  "shape": [
3233
+ 384
 
3234
  ],
3235
  "filename_index": 0
3236
  },
3237
  {
3238
  "offsets": [
3239
+ 384
 
3240
  ],
3241
  "shape": [
3242
+ 384
 
3243
  ],
3244
  "filename_index": 1
3245
  }
3246
  ]
3247
  },
3248
+ "h.11.ln_1.weight": {
3249
  "type": "Distributed",
3250
  "shape": [
 
3251
  768
3252
  ],
3253
  "dtype": "F32",
3254
  "chunks": [
3255
  {
3256
  "offsets": [
 
3257
  0
3258
  ],
3259
  "shape": [
3260
+ 384
 
3261
  ],
3262
  "filename_index": 0
3263
  },
3264
  {
3265
  "offsets": [
3266
+ 384
 
3267
  ],
3268
  "shape": [
3269
+ 384
 
3270
  ],
3271
  "filename_index": 1
3272
  }
3273
  ]
3274
  },
3275
+ "h.5.mlp.c_fc.bias": {
3276
  "type": "Distributed",
3277
  "shape": [
3278
+ 3072
3279
  ],
3280
  "dtype": "F32",
3281
  "chunks": [
 
3284
  0
3285
  ],
3286
  "shape": [
3287
+ 1536
3288
  ],
3289
  "filename_index": 0
3290
  },
3291
  {
3292
  "offsets": [
3293
+ 1536
3294
  ],
3295
  "shape": [
3296
+ 1536
3297
  ],
3298
  "filename_index": 1
3299
  }
3300
  ]
3301
  },
3302
+ "h.1.ln_2.bias": {
3303
  "type": "Distributed",
3304
  "shape": [
3305
  768
 
3326
  }
3327
  ]
3328
  },
3329
+ "h.8.attn.c_attn.bias": {
3330
  "type": "Distributed",
3331
  "shape": [
3332
+ 2304
3333
  ],
3334
  "dtype": "F32",
3335
  "chunks": [
 
3338
  0
3339
  ],
3340
  "shape": [
3341
+ 1152
3342
  ],
3343
  "filename_index": 0
3344
  },
3345
  {
3346
  "offsets": [
3347
+ 1152
3348
  ],
3349
  "shape": [
3350
+ 1152
3351
  ],
3352
  "filename_index": 1
3353
  }
3354
  ]
3355
  },
3356
+ "h.8.mlp.c_proj.bias": {
3357
  "type": "Distributed",
3358
  "shape": [
 
3359
  768
3360
  ],
3361
  "dtype": "F32",
3362
  "chunks": [
3363
  {
3364
  "offsets": [
 
3365
  0
3366
  ],
3367
  "shape": [
 
3368
  384
3369
  ],
3370
  "filename_index": 0
3371
  },
3372
  {
3373
  "offsets": [
 
3374
  384
3375
  ],
3376
  "shape": [
 
3377
  384
3378
  ],
3379
  "filename_index": 1
3380
  }
3381
  ]
3382
  },
3383
+ "h.5.attn.c_attn.weight": {
3384
  "type": "Distributed",
3385
  "shape": [
3386
  768,
3387
+ 2304
3388
  ],
3389
  "dtype": "F32",
3390
  "chunks": [
 
3395
  ],
3396
  "shape": [
3397
  768,
3398
+ 1152
3399
  ],
3400
  "filename_index": 0
3401
  },
3402
  {
3403
  "offsets": [
3404
  0,
3405
+ 1152
3406
  ],
3407
  "shape": [
3408
  768,
3409
+ 1152
3410
  ],
3411
  "filename_index": 1
3412
  }
3413
  ]
3414
  },
3415
+ "h.4.attn.bias": {
3416
  "type": "Distributed",
3417
  "shape": [
3418
  1,
 
3454
  }
3455
  ]
3456
  },
3457
+ "h.5.mlp.c_proj.bias": {
3458
  "type": "Distributed",
3459
  "shape": [
3460
  768
 
3481
  }
3482
  ]
3483
  },
3484
+ "h.0.mlp.c_fc.bias": {
3485
  "type": "Distributed",
3486
  "shape": [
3487
+ 3072
3488
  ],
3489
  "dtype": "F32",
3490
  "chunks": [
 
3493
  0
3494
  ],
3495
  "shape": [
3496
+ 1536
3497
  ],
3498
  "filename_index": 0
3499
  },
3500
  {
3501
  "offsets": [
3502
+ 1536
3503
  ],
3504
  "shape": [
3505
+ 1536
3506
  ],
3507
  "filename_index": 1
3508
  }
3509
  ]
3510
  },
3511
+ "h.1.mlp.c_proj.weight": {
3512
  "type": "Distributed",
3513
  "shape": [
3514
+ 3072,
3515
+ 768
 
 
3516
  ],
3517
  "dtype": "F32",
3518
  "chunks": [
3519
  {
3520
  "offsets": [
 
 
3521
  0,
3522
  0
3523
  ],
3524
  "shape": [
3525
+ 1536,
3526
+ 768
 
 
3527
  ],
3528
  "filename_index": 0
3529
  },
3530
  {
3531
  "offsets": [
3532
+ 1536,
3533
+ 0
 
 
3534
  ],
3535
  "shape": [
3536
+ 1536,
3537
+ 768
 
 
3538
  ],
3539
  "filename_index": 1
3540
  }
3541
  ]
3542
  },
3543
+ "h.6.ln_1.weight": {
3544
  "type": "Distributed",
3545
  "shape": [
3546
+ 768
 
3547
  ],
3548
  "dtype": "F32",
3549
  "chunks": [
3550
  {
3551
  "offsets": [
 
3552
  0
3553
  ],
3554
  "shape": [
3555
+ 384
 
3556
  ],
3557
  "filename_index": 0
3558
  },
3559
  {
3560
  "offsets": [
3561
+ 384
 
3562
  ],
3563
  "shape": [
3564
+ 384
 
3565
  ],
3566
  "filename_index": 1
3567
  }
3568
  ]
3569
  },
3570
+ "h.5.attn.bias": {
3571
  "type": "Distributed",
3572
  "shape": [
3573
+ 1,
3574
+ 1,
3575
+ 1024,
3576
+ 1024
3577
  ],
3578
  "dtype": "F32",
3579
  "chunks": [
3580
  {
3581
  "offsets": [
3582
+ 0,
3583
+ 0,
3584
+ 0,
3585
  0
3586
  ],
3587
  "shape": [
3588
+ 1,
3589
+ 1,
3590
+ 1024,
3591
+ 512
3592
  ],
3593
  "filename_index": 0
3594
  },
3595
  {
3596
  "offsets": [
3597
+ 0,
3598
+ 0,
3599
+ 0,
3600
+ 512
3601
  ],
3602
  "shape": [
3603
+ 1,
3604
+ 1,
3605
+ 1024,
3606
+ 512
3607
  ],
3608
  "filename_index": 1
3609
  }
3610
  ]
3611
  },
3612
+ "h.2.attn.bias": {
3613
  "type": "Distributed",
3614
  "shape": [
3615
+ 1,
3616
+ 1,
3617
+ 1024,
3618
+ 1024
3619
  ],
3620
  "dtype": "F32",
3621
  "chunks": [
3622
  {
3623
  "offsets": [
3624
+ 0,
3625
+ 0,
3626
+ 0,
3627
  0
3628
  ],
3629
  "shape": [
3630
+ 1,
3631
+ 1,
3632
+ 1024,
3633
+ 512
3634
  ],
3635
  "filename_index": 0
3636
  },
3637
  {
3638
  "offsets": [
3639
+ 0,
3640
+ 0,
3641
+ 0,
3642
+ 512
3643
  ],
3644
  "shape": [
3645
+ 1,
3646
+ 1,
3647
+ 1024,
3648
+ 512
3649
  ],
3650
  "filename_index": 1
3651
  }
3652
  ]
3653
  },
3654
+ "h.8.mlp.c_proj.weight": {
3655
  "type": "Distributed",
3656
  "shape": [
3657
+ 3072,
3658
+ 768
3659
  ],
3660
  "dtype": "F32",
3661
  "chunks": [
 
3665
  0
3666
  ],
3667
  "shape": [
3668
+ 1536,
3669
+ 768
3670
  ],
3671
  "filename_index": 0
3672
  },
3673
  {
3674
  "offsets": [
3675
+ 1536,
3676
+ 0
3677
  ],
3678
  "shape": [
3679
+ 1536,
3680
+ 768
3681
  ],
3682
  "filename_index": 1
3683
  }
3684
  ]
3685
  },
3686
+ "h.0.ln_1.bias": {
3687
  "type": "Distributed",
3688
  "shape": [
3689
  768
 
3710
  }
3711
  ]
3712
  },
3713
+ "h.5.attn.c_attn.bias": {
3714
  "type": "Distributed",
3715
  "shape": [
3716
+ 2304
3717
  ],
3718
  "dtype": "F32",
3719
  "chunks": [
 
3722
  0
3723
  ],
3724
  "shape": [
3725
+ 1152
3726
  ],
3727
  "filename_index": 0
3728
  },
3729
  {
3730
  "offsets": [
3731
+ 1152
3732
  ],
3733
  "shape": [
3734
+ 1152
3735
  ],
3736
  "filename_index": 1
3737
  }
3738
  ]
3739
  },
3740
+ "h.11.ln_1.bias": {
3741
  "type": "Distributed",
3742
  "shape": [
3743
+ 768
 
 
 
3744
  ],
3745
  "dtype": "F32",
3746
  "chunks": [
3747
  {
3748
  "offsets": [
 
 
 
3749
  0
3750
  ],
3751
  "shape": [
3752
+ 384
 
 
 
3753
  ],
3754
  "filename_index": 0
3755
  },
3756
  {
3757
  "offsets": [
3758
+ 384
 
 
 
3759
  ],
3760
  "shape": [
3761
+ 384
 
 
 
3762
  ],
3763
  "filename_index": 1
3764
  }
3765
  ]
3766
  },
3767
+ "h.11.attn.c_proj.weight": {
3768
  "type": "Distributed",
3769
  "shape": [
3770
+ 768,
3771
  768
3772
  ],
3773
  "dtype": "F32",
3774
  "chunks": [
3775
  {
3776
  "offsets": [
3777
+ 0,
3778
  0
3779
  ],
3780
  "shape": [
3781
+ 384,
3782
+ 768
3783
  ],
3784
  "filename_index": 0
3785
  },
3786
  {
3787
  "offsets": [
3788
+ 384,
3789
+ 0
3790
  ],
3791
  "shape": [
3792
+ 384,
3793
+ 768
3794
  ],
3795
  "filename_index": 1
3796
  }
3797
  ]
3798
  },
3799
+ "h.10.attn.c_attn.weight": {
3800
  "type": "Distributed",
3801
  "shape": [
3802
+ 768,
3803
+ 2304
3804
  ],
3805
  "dtype": "F32",
3806
  "chunks": [
3807
  {
3808
  "offsets": [
3809
+ 0,
3810
  0
3811
  ],
3812
  "shape": [
3813
+ 768,
3814
+ 1152
3815
  ],
3816
  "filename_index": 0
3817
  },
3818
  {
3819
  "offsets": [
3820
+ 0,
3821
+ 1152
3822
  ],
3823
  "shape": [
3824
+ 768,
3825
+ 1152
3826
  ],
3827
  "filename_index": 1
3828
  }
3829
  ]
3830
  },
3831
+ "h.1.mlp.c_proj.bias": {
3832
  "type": "Distributed",
3833
  "shape": [
3834
+ 768
 
3835
  ],
3836
  "dtype": "F32",
3837
  "chunks": [
3838
  {
3839
  "offsets": [
 
3840
  0
3841
  ],
3842
  "shape": [
3843
+ 384
 
3844
  ],
3845
  "filename_index": 0
3846
  },
3847
  {
3848
  "offsets": [
3849
+ 384
 
3850
  ],
3851
  "shape": [
3852
+ 384
 
3853
  ],
3854
  "filename_index": 1
3855
  }
3856
  ]
3857
  },
3858
+ "h.3.ln_1.bias": {
3859
  "type": "Distributed",
3860
  "shape": [
3861
  768
 
3882
  }
3883
  ]
3884
  },
3885
+ "h.6.ln_2.bias": {
3886
  "type": "Distributed",
3887
  "shape": [
3888
  768
 
3909
  }
3910
  ]
3911
  },
3912
+ "h.2.ln_1.weight": {
3913
  "type": "Distributed",
3914
  "shape": [
3915
  768
 
3936
  }
3937
  ]
3938
  },
3939
+ "h.4.ln_1.weight": {
3940
  "type": "Distributed",
3941
  "shape": [
3942
  768
 
3963
  }
3964
  ]
3965
  },
3966
+ "h.10.ln_2.weight": {
3967
  "type": "Distributed",
3968
  "shape": [
3969
  768
 
4022
  }
4023
  ]
4024
  },
4025
+ "h.11.ln_2.weight": {
4026
  "type": "Distributed",
4027
  "shape": [
 
4028
  768
4029
  ],
4030
  "dtype": "F32",
4031
  "chunks": [
4032
  {
4033
  "offsets": [
 
4034
  0
4035
  ],
4036
  "shape": [
4037
+ 384
 
4038
  ],
4039
  "filename_index": 0
4040
  },
4041
  {
4042
  "offsets": [
4043
+ 384
 
4044
  ],
4045
  "shape": [
4046
+ 384
 
4047
  ],
4048
  "filename_index": 1
4049
  }
4050
  ]
4051
  },
4052
+ "h.8.attn.c_proj.weight": {
4053
  "type": "Distributed",
4054
  "shape": [
4055
+ 768,
4056
+ 768
4057
  ],
4058
  "dtype": "F32",
4059
  "chunks": [
4060
  {
4061
  "offsets": [
4062
+ 0,
4063
  0
4064
  ],
4065
  "shape": [
4066
+ 384,
4067
+ 768
4068
  ],
4069
  "filename_index": 0
4070
  },
4071
  {
4072
  "offsets": [
4073
+ 384,
4074
+ 0
4075
  ],
4076
  "shape": [
4077
+ 384,
4078
+ 768
4079
  ],
4080
  "filename_index": 1
4081
  }
4082
  ]
4083
  },
4084
+ "h.9.attn.bias": {
4085
  "type": "Distributed",
4086
  "shape": [
4087
+ 1,
4088
+ 1,
4089
+ 1024,
4090
+ 1024
4091
  ],
4092
  "dtype": "F32",
4093
  "chunks": [
4094
  {
4095
  "offsets": [
4096
+ 0,
4097
+ 0,
4098
+ 0,
4099
  0
4100
  ],
4101
  "shape": [
4102
+ 1,
4103
+ 1,
4104
+ 1024,
4105
+ 512
4106
  ],
4107
  "filename_index": 0
4108
  },
4109
  {
4110
  "offsets": [
4111
+ 0,
4112
+ 0,
4113
+ 0,
4114
+ 512
4115
  ],
4116
  "shape": [
4117
+ 1,
4118
+ 1,
4119
+ 1024,
4120
+ 512
4121
  ],
4122
  "filename_index": 1
4123
  }
4124
  ]
4125
  },
4126
+ "h.1.ln_2.weight": {
4127
  "type": "Distributed",
4128
  "shape": [
4129
  768
 
4150
  }
4151
  ]
4152
  },
4153
+ "h.4.ln_2.weight": {
4154
  "type": "Distributed",
4155
  "shape": [
4156
+ 768
4157
  ],
4158
  "dtype": "F32",
4159
  "chunks": [
 
4162
  0
4163
  ],
4164
  "shape": [
4165
+ 384
4166
  ],
4167
  "filename_index": 0
4168
  },
4169
  {
4170
  "offsets": [
4171
+ 384
4172
  ],
4173
  "shape": [
4174
+ 384
4175
  ],
4176
  "filename_index": 1
4177
  }
4178
  ]
4179
  },
4180
+ "h.3.attn.c_attn.weight": {
4181
  "type": "Distributed",
4182
  "shape": [
4183
+ 768,
4184
+ 2304
4185
  ],
4186
  "dtype": "F32",
4187
  "chunks": [
4188
  {
4189
  "offsets": [
4190
+ 0,
4191
  0
4192
  ],
4193
  "shape": [
4194
+ 768,
4195
+ 1152
4196
  ],
4197
  "filename_index": 0
4198
  },
4199
  {
4200
  "offsets": [
4201
+ 0,
4202
+ 1152
4203
  ],
4204
  "shape": [
4205
+ 768,
4206
+ 1152
4207
  ],
4208
  "filename_index": 1
4209
  }
4210
  ]
4211
  },
4212
+ "h.8.mlp.c_fc.bias": {
4213
  "type": "Distributed",
4214
  "shape": [
4215
  3072
 
4236
  }
4237
  ]
4238
  },
4239
+ "h.10.attn.c_proj.bias": {
4240
  "type": "Distributed",
4241
  "shape": [
4242
+ 768
 
4243
  ],
4244
  "dtype": "F32",
4245
  "chunks": [
4246
  {
4247
  "offsets": [
 
4248
  0
4249
  ],
4250
  "shape": [
4251
+ 384
 
4252
  ],
4253
  "filename_index": 0
4254
  },
4255
  {
4256
  "offsets": [
4257
+ 384
 
4258
  ],
4259
  "shape": [
4260
+ 384
 
4261
  ],
4262
  "filename_index": 1
4263
  }
4264
  ]
4265
  },
4266
+ "h.9.attn.c_attn.weight": {
4267
  "type": "Distributed",
4268
  "shape": [
4269
  768,
4270
+ 2304
4271
  ],
4272
  "dtype": "F32",
4273
  "chunks": [
 
4277
  0
4278
  ],
4279
  "shape": [
4280
+ 768,
4281
+ 1152
4282
  ],
4283
  "filename_index": 0
4284
  },
4285
  {
4286
  "offsets": [
4287
+ 0,
4288
+ 1152
4289
  ],
4290
  "shape": [
4291
+ 768,
4292
+ 1152
4293
  ],
4294
  "filename_index": 1
4295
  }
4296
  ]
4297
  },
4298
+ "h.3.ln_2.bias": {
4299
  "type": "Distributed",
4300
  "shape": [
4301
  768
 
4322
  }
4323
  ]
4324
  },
4325
+ "h.6.attn.c_attn.bias": {
4326
  "type": "Distributed",
4327
  "shape": [
4328
+ 2304
 
4329
  ],
4330
  "dtype": "F32",
4331
  "chunks": [
4332
  {
4333
  "offsets": [
 
4334
  0
4335
  ],
4336
  "shape": [
4337
+ 1152
 
4338
  ],
4339
  "filename_index": 0
4340
  },
4341
  {
4342
  "offsets": [
4343
+ 1152
 
4344
  ],
4345
  "shape": [
4346
+ 1152
 
4347
  ],
4348
  "filename_index": 1
4349
  }
4350
  ]
4351
  },
4352
+ "h.2.attn.c_attn.bias": {
4353
  "type": "Distributed",
4354
  "shape": [
4355
  2304
 
4376
  }
4377
  ]
4378
  },
4379
+ "h.10.mlp.c_proj.weight": {
4380
  "type": "Distributed",
4381
  "shape": [
4382
+ 3072,
4383
  768
4384
  ],
4385
  "dtype": "F32",
4386
  "chunks": [
4387
  {
4388
  "offsets": [
4389
+ 0,
4390
  0
4391
  ],
4392
  "shape": [
4393
+ 1536,
4394
+ 768
4395
  ],
4396
  "filename_index": 0
4397
  },
4398
  {
4399
  "offsets": [
4400
+ 1536,
4401
+ 0
4402
  ],
4403
  "shape": [
4404
+ 1536,
4405
+ 768
4406
  ],
4407
  "filename_index": 1
4408
  }
4409
  ]
4410
  },
4411
+ "h.1.attn.c_attn.weight": {
4412
  "type": "Distributed",
4413
  "shape": [
4414
+ 768,
4415
  2304
4416
  ],
4417
  "dtype": "F32",
4418
  "chunks": [
4419
  {
4420
  "offsets": [
4421
+ 0,
4422
  0
4423
  ],
4424
  "shape": [
4425
+ 768,
4426
  1152
4427
  ],
4428
  "filename_index": 0
4429
  },
4430
  {
4431
  "offsets": [
4432
+ 0,
4433
  1152
4434
  ],
4435
  "shape": [
4436
+ 768,
4437
  1152
4438
  ],
4439
  "filename_index": 1
4440
  }
4441
  ]
4442
  },
4443
+ "h.4.attn.c_attn.weight": {
4444
  "type": "Distributed",
4445
  "shape": [
4446
+ 768,
4447
+ 2304
4448
  ],
4449
  "dtype": "F32",
4450
  "chunks": [
4451
  {
4452
  "offsets": [
4453
+ 0,
4454
  0
4455
  ],
4456
  "shape": [
4457
+ 768,
4458
+ 1152
4459
  ],
4460
  "filename_index": 0
4461
  },
4462
  {
4463
  "offsets": [
4464
+ 0,
4465
+ 1152
4466
  ],
4467
  "shape": [
4468
+ 768,
4469
+ 1152
4470
  ],
4471
  "filename_index": 1
4472
  }
4473
  ]
4474
  },
4475
+ "h.6.attn.c_proj.bias": {
4476
  "type": "Distributed",
4477
  "shape": [
4478
  768
 
4499
  }
4500
  ]
4501
  },
4502
+ "h.0.attn.c_proj.weight": {
4503
  "type": "Distributed",
4504
  "shape": [
4505
+ 768,
4506
+ 768
 
 
4507
  ],
4508
  "dtype": "F32",
4509
  "chunks": [
4510
  {
4511
  "offsets": [
 
 
4512
  0,
4513
  0
4514
  ],
4515
  "shape": [
4516
+ 384,
4517
+ 768
 
 
4518
  ],
4519
  "filename_index": 0
4520
  },
4521
  {
4522
  "offsets": [
4523
+ 384,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4524
  0
4525
  ],
4526
  "shape": [
4527
+ 384,
4528
+ 768
 
 
 
 
 
 
 
 
4529
  ],
4530
  "filename_index": 1
4531
  }
4532
  ]
4533
  },
4534
+ "h.11.mlp.c_fc.bias": {
4535
  "type": "Distributed",
4536
  "shape": [
4537
+ 3072
4538
  ],
4539
  "dtype": "F32",
4540
  "chunks": [
 
4543
  0
4544
  ],
4545
  "shape": [
4546
+ 1536
4547
  ],
4548
  "filename_index": 0
4549
  },
4550
  {
4551
  "offsets": [
4552
+ 1536
4553
  ],
4554
  "shape": [
4555
+ 1536
4556
  ],
4557
  "filename_index": 1
4558
  }
4559
  ]
4560
  },
4561
+ "h.0.attn.c_attn.weight": {
4562
  "type": "Distributed",
4563
  "shape": [
4564
  768,
 
4590
  }
4591
  ]
4592
  },
4593
+ "h.3.attn.bias": {
4594
  "type": "Distributed",
4595
  "shape": [
4596
  1,
 
4632
  }
4633
  ]
4634
  },
4635
+ "h.4.attn.c_proj.weight": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4636
  "type": "Distributed",
4637
  "shape": [
4638
  768,
4639
+ 768
4640
  ],
4641
  "dtype": "F32",
4642
  "chunks": [
 
4646
  0
4647
  ],
4648
  "shape": [
4649
+ 384,
4650
+ 768
4651
  ],
4652
  "filename_index": 0
4653
  },
4654
  {
4655
  "offsets": [
4656
+ 384,
4657
+ 0
4658
  ],
4659
  "shape": [
4660
+ 384,
4661
+ 768
4662
  ],
4663
  "filename_index": 1
4664
  }
4665
  ]
4666
  },
4667
+ "h.8.ln_2.weight": {
4668
  "type": "Distributed",
4669
  "shape": [
4670
  768
 
4691
  }
4692
  ]
4693
  },
4694
+ "h.0.attn.c_proj.bias": {
4695
  "type": "Distributed",
4696
  "shape": [
4697
  768
 
4718
  }
4719
  ]
4720
  },
4721
+ "h.4.mlp.c_proj.weight": {
4722
  "type": "Distributed",
4723
  "shape": [
4724
+ 3072,
4725
+ 768
4726
  ],
4727
  "dtype": "F32",
4728
  "chunks": [
4729
  {
4730
  "offsets": [
4731
+ 0,
4732
  0
4733
  ],
4734
  "shape": [
4735
+ 1536,
4736
+ 768
4737
  ],
4738
  "filename_index": 0
4739
  },
4740
  {
4741
  "offsets": [
4742
+ 1536,
4743
+ 0
4744
  ],
4745
  "shape": [
4746
+ 1536,
4747
+ 768
4748
  ],
4749
  "filename_index": 1
4750
  }