Narsil HF Staff commited on
Commit
492bdfd
·
verified ·
1 Parent(s): 66c7fdc

Upload topology.json

Browse files
Files changed (1) hide show
  1. topology.json +867 -867
topology.json CHANGED
@@ -1,33 +1,38 @@
1
  {
2
  "tensors": {
3
- "h.3.mlp.c_proj.bias": {
4
  "type": "Distributed",
5
  "shape": [
 
6
  768
7
  ],
8
  "dtype": "F32",
9
  "chunks": [
10
  {
11
  "offsets": [
 
12
  0
13
  ],
14
  "shape": [
15
- 384
 
16
  ],
17
  "filename_index": 0
18
  },
19
  {
20
  "offsets": [
21
- 384
 
22
  ],
23
  "shape": [
24
- 384
 
25
  ],
26
  "filename_index": 1
27
  }
28
  ]
29
  },
30
- "h.3.ln_1.weight": {
31
  "type": "Distributed",
32
  "shape": [
33
  768
@@ -54,11 +59,11 @@
54
  }
55
  ]
56
  },
57
- "h.4.attn.c_proj.weight": {
58
  "type": "Distributed",
59
  "shape": [
60
  768,
61
- 768
62
  ],
63
  "dtype": "F32",
64
  "chunks": [
@@ -68,28 +73,28 @@
68
  0
69
  ],
70
  "shape": [
71
- 384,
72
- 768
73
  ],
74
  "filename_index": 0
75
  },
76
  {
77
  "offsets": [
78
- 384,
79
- 0
80
  ],
81
  "shape": [
82
- 384,
83
- 768
84
  ],
85
  "filename_index": 1
86
  }
87
  ]
88
  },
89
- "h.1.attn.c_proj.bias": {
90
  "type": "Distributed",
91
  "shape": [
92
- 768
93
  ],
94
  "dtype": "F32",
95
  "chunks": [
@@ -98,67 +103,52 @@
98
  0
99
  ],
100
  "shape": [
101
- 384
102
  ],
103
  "filename_index": 0
104
  },
105
  {
106
  "offsets": [
107
- 384
108
  ],
109
  "shape": [
110
- 384
111
  ],
112
  "filename_index": 1
113
  }
114
  ]
115
  },
116
- "h.6.attn.bias": {
117
  "type": "Distributed",
118
  "shape": [
119
- 1,
120
- 1,
121
- 1024,
122
- 1024
123
  ],
124
  "dtype": "F32",
125
  "chunks": [
126
  {
127
  "offsets": [
128
- 0,
129
- 0,
130
- 0,
131
  0
132
  ],
133
  "shape": [
134
- 1,
135
- 1,
136
- 1024,
137
- 512
138
  ],
139
  "filename_index": 0
140
  },
141
  {
142
  "offsets": [
143
- 0,
144
- 0,
145
- 0,
146
- 512
147
  ],
148
  "shape": [
149
- 1,
150
- 1,
151
- 1024,
152
- 512
153
  ],
154
  "filename_index": 1
155
  }
156
  ]
157
  },
158
- "h.5.ln_1.bias": {
159
  "type": "Distributed",
160
  "shape": [
161
- 768
162
  ],
163
  "dtype": "F32",
164
  "chunks": [
@@ -167,22 +157,22 @@
167
  0
168
  ],
169
  "shape": [
170
- 384
171
  ],
172
  "filename_index": 0
173
  },
174
  {
175
  "offsets": [
176
- 384
177
  ],
178
  "shape": [
179
- 384
180
  ],
181
  "filename_index": 1
182
  }
183
  ]
184
  },
185
- "h.2.mlp.c_proj.bias": {
186
  "type": "Distributed",
187
  "shape": [
188
  768
@@ -209,10 +199,10 @@
209
  }
210
  ]
211
  },
212
- "h.0.attn.c_attn.bias": {
213
  "type": "Distributed",
214
  "shape": [
215
- 2304
216
  ],
217
  "dtype": "F32",
218
  "chunks": [
@@ -221,22 +211,22 @@
221
  0
222
  ],
223
  "shape": [
224
- 1152
225
  ],
226
  "filename_index": 0
227
  },
228
  {
229
  "offsets": [
230
- 1152
231
  ],
232
  "shape": [
233
- 1152
234
  ],
235
  "filename_index": 1
236
  }
237
  ]
238
  },
239
- "h.11.ln_1.bias": {
240
  "type": "Distributed",
241
  "shape": [
242
  768
@@ -263,64 +253,84 @@
263
  }
264
  ]
265
  },
266
- "h.4.attn.c_attn.bias": {
267
  "type": "Distributed",
268
  "shape": [
269
- 2304
 
 
 
270
  ],
271
  "dtype": "F32",
272
  "chunks": [
273
  {
274
  "offsets": [
 
 
 
275
  0
276
  ],
277
  "shape": [
278
- 1152
 
 
 
279
  ],
280
  "filename_index": 0
281
  },
282
  {
283
  "offsets": [
284
- 1152
 
 
 
285
  ],
286
  "shape": [
287
- 1152
 
 
 
288
  ],
289
  "filename_index": 1
290
  }
291
  ]
292
  },
293
- "h.6.mlp.c_proj.bias": {
294
  "type": "Distributed",
295
  "shape": [
296
- 768
 
297
  ],
298
  "dtype": "F32",
299
  "chunks": [
300
  {
301
  "offsets": [
 
302
  0
303
  ],
304
  "shape": [
305
- 384
 
306
  ],
307
  "filename_index": 0
308
  },
309
  {
310
  "offsets": [
311
- 384
 
312
  ],
313
  "shape": [
314
- 384
 
315
  ],
316
  "filename_index": 1
317
  }
318
  ]
319
  },
320
- "h.9.mlp.c_fc.bias": {
321
  "type": "Distributed",
322
  "shape": [
323
- 3072
324
  ],
325
  "dtype": "F32",
326
  "chunks": [
@@ -329,86 +339,76 @@
329
  0
330
  ],
331
  "shape": [
332
- 1536
333
  ],
334
  "filename_index": 0
335
  },
336
  {
337
  "offsets": [
338
- 1536
339
  ],
340
  "shape": [
341
- 1536
342
  ],
343
  "filename_index": 1
344
  }
345
  ]
346
  },
347
- "h.3.mlp.c_proj.weight": {
348
  "type": "Distributed",
349
  "shape": [
350
- 3072,
351
  768
352
  ],
353
  "dtype": "F32",
354
  "chunks": [
355
  {
356
  "offsets": [
357
- 0,
358
  0
359
  ],
360
  "shape": [
361
- 1536,
362
- 768
363
  ],
364
  "filename_index": 0
365
  },
366
  {
367
  "offsets": [
368
- 1536,
369
- 0
370
  ],
371
  "shape": [
372
- 1536,
373
- 768
374
  ],
375
  "filename_index": 1
376
  }
377
  ]
378
  },
379
- "h.7.attn.c_attn.weight": {
380
  "type": "Distributed",
381
  "shape": [
382
- 768,
383
- 2304
384
  ],
385
  "dtype": "F32",
386
  "chunks": [
387
  {
388
  "offsets": [
389
- 0,
390
  0
391
  ],
392
  "shape": [
393
- 768,
394
- 1152
395
  ],
396
  "filename_index": 0
397
  },
398
  {
399
  "offsets": [
400
- 0,
401
- 1152
402
  ],
403
  "shape": [
404
- 768,
405
- 1152
406
  ],
407
  "filename_index": 1
408
  }
409
  ]
410
  },
411
- "h.9.ln_2.weight": {
412
  "type": "Distributed",
413
  "shape": [
414
  768
@@ -435,64 +435,74 @@
435
  }
436
  ]
437
  },
438
- "h.3.ln_2.weight": {
439
  "type": "Distributed",
440
  "shape": [
441
- 768
 
442
  ],
443
  "dtype": "F32",
444
  "chunks": [
445
  {
446
  "offsets": [
 
447
  0
448
  ],
449
  "shape": [
450
- 384
 
451
  ],
452
  "filename_index": 0
453
  },
454
  {
455
  "offsets": [
456
- 384
 
457
  ],
458
  "shape": [
459
- 384
 
460
  ],
461
  "filename_index": 1
462
  }
463
  ]
464
  },
465
- "h.5.mlp.c_fc.bias": {
466
  "type": "Distributed",
467
  "shape": [
 
468
  3072
469
  ],
470
  "dtype": "F32",
471
  "chunks": [
472
  {
473
  "offsets": [
 
474
  0
475
  ],
476
  "shape": [
 
477
  1536
478
  ],
479
  "filename_index": 0
480
  },
481
  {
482
  "offsets": [
 
483
  1536
484
  ],
485
  "shape": [
 
486
  1536
487
  ],
488
  "filename_index": 1
489
  }
490
  ]
491
  },
492
- "h.6.ln_2.bias": {
493
  "type": "Distributed",
494
  "shape": [
495
- 768
496
  ],
497
  "dtype": "F32",
498
  "chunks": [
@@ -501,25 +511,25 @@
501
  0
502
  ],
503
  "shape": [
504
- 384
505
  ],
506
  "filename_index": 0
507
  },
508
  {
509
  "offsets": [
510
- 384
511
  ],
512
  "shape": [
513
- 384
514
  ],
515
  "filename_index": 1
516
  }
517
  ]
518
  },
519
- "h.4.ln_1.weight": {
520
  "type": "Distributed",
521
  "shape": [
522
- 768
523
  ],
524
  "dtype": "F32",
525
  "chunks": [
@@ -528,54 +538,49 @@
528
  0
529
  ],
530
  "shape": [
531
- 384
532
  ],
533
  "filename_index": 0
534
  },
535
  {
536
  "offsets": [
537
- 384
538
  ],
539
  "shape": [
540
- 384
541
  ],
542
  "filename_index": 1
543
  }
544
  ]
545
  },
546
- "h.3.attn.c_proj.weight": {
547
  "type": "Distributed",
548
  "shape": [
549
- 768,
550
  768
551
  ],
552
  "dtype": "F32",
553
  "chunks": [
554
  {
555
  "offsets": [
556
- 0,
557
  0
558
  ],
559
  "shape": [
560
- 384,
561
- 768
562
  ],
563
  "filename_index": 0
564
  },
565
  {
566
  "offsets": [
567
- 384,
568
- 0
569
  ],
570
  "shape": [
571
- 384,
572
- 768
573
  ],
574
  "filename_index": 1
575
  }
576
  ]
577
  },
578
- "h.1.ln_1.weight": {
579
  "type": "Distributed",
580
  "shape": [
581
  768
@@ -602,7 +607,7 @@
602
  }
603
  ]
604
  },
605
- "h.6.ln_1.bias": {
606
  "type": "Distributed",
607
  "shape": [
608
  768
@@ -629,7 +634,7 @@
629
  }
630
  ]
631
  },
632
- "h.3.attn.c_attn.bias": {
633
  "type": "Distributed",
634
  "shape": [
635
  2304
@@ -656,34 +661,49 @@
656
  }
657
  ]
658
  },
659
- "ln_f.weight": {
660
  "type": "Distributed",
661
  "shape": [
662
- 768
 
 
 
663
  ],
664
  "dtype": "F32",
665
  "chunks": [
666
  {
667
  "offsets": [
 
 
 
668
  0
669
  ],
670
  "shape": [
671
- 384
 
 
 
672
  ],
673
  "filename_index": 0
674
  },
675
  {
676
  "offsets": [
677
- 384
 
 
 
678
  ],
679
  "shape": [
680
- 384
 
 
 
681
  ],
682
  "filename_index": 1
683
  }
684
  ]
685
  },
686
- "h.10.mlp.c_fc.bias": {
687
  "type": "Distributed",
688
  "shape": [
689
  3072
@@ -710,10 +730,10 @@
710
  }
711
  ]
712
  },
713
- "h.6.mlp.c_fc.bias": {
714
  "type": "Distributed",
715
  "shape": [
716
- 3072
717
  ],
718
  "dtype": "F32",
719
  "chunks": [
@@ -722,126 +742,116 @@
722
  0
723
  ],
724
  "shape": [
725
- 1536
726
  ],
727
  "filename_index": 0
728
  },
729
  {
730
  "offsets": [
731
- 1536
732
  ],
733
  "shape": [
734
- 1536
735
  ],
736
  "filename_index": 1
737
  }
738
  ]
739
  },
740
- "h.11.mlp.c_fc.bias": {
741
  "type": "Distributed",
742
  "shape": [
743
- 3072
 
744
  ],
745
  "dtype": "F32",
746
  "chunks": [
747
  {
748
  "offsets": [
 
749
  0
750
  ],
751
  "shape": [
752
- 1536
 
753
  ],
754
  "filename_index": 0
755
  },
756
  {
757
  "offsets": [
758
- 1536
 
759
  ],
760
  "shape": [
761
- 1536
 
762
  ],
763
  "filename_index": 1
764
  }
765
  ]
766
  },
767
- "h.5.attn.c_attn.weight": {
768
  "type": "Distributed",
769
  "shape": [
770
- 768,
771
  2304
772
  ],
773
  "dtype": "F32",
774
  "chunks": [
775
  {
776
  "offsets": [
777
- 0,
778
  0
779
  ],
780
  "shape": [
781
- 768,
782
  1152
783
  ],
784
  "filename_index": 0
785
  },
786
  {
787
  "offsets": [
788
- 0,
789
  1152
790
  ],
791
  "shape": [
792
- 768,
793
  1152
794
  ],
795
  "filename_index": 1
796
  }
797
  ]
798
  },
799
- "h.1.attn.bias": {
800
  "type": "Distributed",
801
  "shape": [
802
- 1,
803
- 1,
804
- 1024,
805
- 1024
806
  ],
807
  "dtype": "F32",
808
  "chunks": [
809
  {
810
  "offsets": [
811
- 0,
812
- 0,
813
  0,
814
  0
815
  ],
816
  "shape": [
817
- 1,
818
- 1,
819
- 1024,
820
- 512
821
  ],
822
  "filename_index": 0
823
  },
824
  {
825
  "offsets": [
826
- 0,
827
- 0,
828
- 0,
829
- 512
830
  ],
831
  "shape": [
832
- 1,
833
- 1,
834
- 1024,
835
- 512
836
  ],
837
  "filename_index": 1
838
  }
839
  ]
840
  },
841
- "h.3.attn.c_proj.bias": {
842
  "type": "Distributed",
843
  "shape": [
844
- 768
845
  ],
846
  "dtype": "F32",
847
  "chunks": [
@@ -850,64 +860,54 @@
850
  0
851
  ],
852
  "shape": [
853
- 384
854
  ],
855
  "filename_index": 0
856
  },
857
  {
858
  "offsets": [
859
- 384
860
  ],
861
  "shape": [
862
- 384
863
  ],
864
  "filename_index": 1
865
  }
866
  ]
867
  },
868
- "h.9.attn.bias": {
869
  "type": "Distributed",
870
  "shape": [
871
- 1,
872
- 1,
873
- 1024,
874
- 1024
875
  ],
876
  "dtype": "F32",
877
  "chunks": [
878
  {
879
  "offsets": [
880
- 0,
881
- 0,
882
  0,
883
  0
884
  ],
885
  "shape": [
886
- 1,
887
- 1,
888
- 1024,
889
- 512
890
  ],
891
  "filename_index": 0
892
  },
893
  {
894
  "offsets": [
895
  0,
896
- 0,
897
- 0,
898
- 512
899
  ],
900
  "shape": [
901
- 1,
902
- 1,
903
- 1024,
904
- 512
905
  ],
906
  "filename_index": 1
907
  }
908
  ]
909
  },
910
- "h.4.ln_2.bias": {
911
  "type": "Distributed",
912
  "shape": [
913
  768
@@ -934,7 +934,7 @@
934
  }
935
  ]
936
  },
937
- "h.5.mlp.c_proj.bias": {
938
  "type": "Distributed",
939
  "shape": [
940
  768
@@ -961,39 +961,34 @@
961
  }
962
  ]
963
  },
964
- "h.7.mlp.c_proj.weight": {
965
  "type": "Distributed",
966
  "shape": [
967
- 3072,
968
  768
969
  ],
970
  "dtype": "F32",
971
  "chunks": [
972
  {
973
  "offsets": [
974
- 0,
975
  0
976
  ],
977
  "shape": [
978
- 1536,
979
- 768
980
  ],
981
  "filename_index": 0
982
  },
983
  {
984
  "offsets": [
985
- 1536,
986
- 0
987
  ],
988
  "shape": [
989
- 1536,
990
- 768
991
  ],
992
  "filename_index": 1
993
  }
994
  ]
995
  },
996
- "h.5.attn.c_proj.bias": {
997
  "type": "Distributed",
998
  "shape": [
999
  768
@@ -1020,43 +1015,38 @@
1020
  }
1021
  ]
1022
  },
1023
- "h.0.attn.c_proj.weight": {
1024
  "type": "Distributed",
1025
  "shape": [
1026
- 768,
1027
  768
1028
  ],
1029
  "dtype": "F32",
1030
  "chunks": [
1031
  {
1032
  "offsets": [
1033
- 0,
1034
  0
1035
  ],
1036
  "shape": [
1037
- 384,
1038
- 768
1039
  ],
1040
  "filename_index": 0
1041
  },
1042
  {
1043
  "offsets": [
1044
- 384,
1045
- 0
1046
  ],
1047
  "shape": [
1048
- 384,
1049
- 768
1050
  ],
1051
  "filename_index": 1
1052
  }
1053
  ]
1054
  },
1055
- "h.9.mlp.c_fc.weight": {
1056
  "type": "Distributed",
1057
  "shape": [
1058
- 768,
1059
- 3072
1060
  ],
1061
  "dtype": "F32",
1062
  "chunks": [
@@ -1066,25 +1056,25 @@
1066
  0
1067
  ],
1068
  "shape": [
1069
- 768,
1070
- 1536
1071
  ],
1072
  "filename_index": 0
1073
  },
1074
  {
1075
  "offsets": [
1076
- 0,
1077
- 1536
1078
  ],
1079
  "shape": [
1080
- 768,
1081
- 1536
1082
  ],
1083
  "filename_index": 1
1084
  }
1085
  ]
1086
  },
1087
- "h.9.ln_1.bias": {
1088
  "type": "Distributed",
1089
  "shape": [
1090
  768
@@ -1111,42 +1101,37 @@
1111
  }
1112
  ]
1113
  },
1114
- "h.6.attn.c_attn.weight": {
1115
  "type": "Distributed",
1116
  "shape": [
1117
- 768,
1118
- 2304
1119
  ],
1120
  "dtype": "F32",
1121
  "chunks": [
1122
  {
1123
  "offsets": [
1124
- 0,
1125
  0
1126
  ],
1127
  "shape": [
1128
- 768,
1129
- 1152
1130
  ],
1131
  "filename_index": 0
1132
  },
1133
  {
1134
  "offsets": [
1135
- 0,
1136
- 1152
1137
  ],
1138
  "shape": [
1139
- 768,
1140
- 1152
1141
  ],
1142
  "filename_index": 1
1143
  }
1144
  ]
1145
  },
1146
- "h.11.mlp.c_proj.bias": {
1147
  "type": "Distributed",
1148
  "shape": [
1149
- 768
1150
  ],
1151
  "dtype": "F32",
1152
  "chunks": [
@@ -1155,52 +1140,57 @@
1155
  0
1156
  ],
1157
  "shape": [
1158
- 384
1159
  ],
1160
  "filename_index": 0
1161
  },
1162
  {
1163
  "offsets": [
1164
- 384
1165
  ],
1166
  "shape": [
1167
- 384
1168
  ],
1169
  "filename_index": 1
1170
  }
1171
  ]
1172
  },
1173
- "h.2.ln_1.weight": {
1174
  "type": "Distributed",
1175
  "shape": [
1176
- 768
 
1177
  ],
1178
  "dtype": "F32",
1179
  "chunks": [
1180
  {
1181
  "offsets": [
 
1182
  0
1183
  ],
1184
  "shape": [
1185
- 384
 
1186
  ],
1187
  "filename_index": 0
1188
  },
1189
  {
1190
  "offsets": [
1191
- 384
 
1192
  ],
1193
  "shape": [
1194
- 384
 
1195
  ],
1196
  "filename_index": 1
1197
  }
1198
  ]
1199
  },
1200
- "h.7.attn.c_attn.bias": {
1201
  "type": "Distributed",
1202
  "shape": [
1203
- 2304
1204
  ],
1205
  "dtype": "F32",
1206
  "chunks": [
@@ -1209,22 +1199,22 @@
1209
  0
1210
  ],
1211
  "shape": [
1212
- 1152
1213
  ],
1214
  "filename_index": 0
1215
  },
1216
  {
1217
  "offsets": [
1218
- 1152
1219
  ],
1220
  "shape": [
1221
- 1152
1222
  ],
1223
  "filename_index": 1
1224
  }
1225
  ]
1226
  },
1227
- "h.4.ln_1.bias": {
1228
  "type": "Distributed",
1229
  "shape": [
1230
  768
@@ -1251,7 +1241,7 @@
1251
  }
1252
  ]
1253
  },
1254
- "h.9.mlp.c_proj.bias": {
1255
  "type": "Distributed",
1256
  "shape": [
1257
  768
@@ -1278,43 +1268,38 @@
1278
  }
1279
  ]
1280
  },
1281
- "h.4.mlp.c_proj.weight": {
1282
  "type": "Distributed",
1283
  "shape": [
1284
- 3072,
1285
  768
1286
  ],
1287
  "dtype": "F32",
1288
  "chunks": [
1289
  {
1290
  "offsets": [
1291
- 0,
1292
  0
1293
  ],
1294
  "shape": [
1295
- 1536,
1296
- 768
1297
  ],
1298
  "filename_index": 0
1299
  },
1300
  {
1301
  "offsets": [
1302
- 1536,
1303
- 0
1304
  ],
1305
  "shape": [
1306
- 1536,
1307
- 768
1308
  ],
1309
  "filename_index": 1
1310
  }
1311
  ]
1312
  },
1313
- "h.3.attn.c_attn.weight": {
1314
  "type": "Distributed",
1315
  "shape": [
1316
- 768,
1317
- 2304
1318
  ],
1319
  "dtype": "F32",
1320
  "chunks": [
@@ -1324,25 +1309,25 @@
1324
  0
1325
  ],
1326
  "shape": [
1327
- 768,
1328
- 1152
1329
  ],
1330
  "filename_index": 0
1331
  },
1332
  {
1333
  "offsets": [
1334
  0,
1335
- 1152
1336
  ],
1337
  "shape": [
1338
- 768,
1339
- 1152
1340
  ],
1341
  "filename_index": 1
1342
  }
1343
  ]
1344
  },
1345
- "h.2.ln_2.bias": {
1346
  "type": "Distributed",
1347
  "shape": [
1348
  768
@@ -1401,10 +1386,10 @@
1401
  }
1402
  ]
1403
  },
1404
- "h.8.mlp.c_fc.bias": {
1405
  "type": "Distributed",
1406
  "shape": [
1407
- 3072
1408
  ],
1409
  "dtype": "F32",
1410
  "chunks": [
@@ -1413,25 +1398,25 @@
1413
  0
1414
  ],
1415
  "shape": [
1416
- 1536
1417
  ],
1418
  "filename_index": 0
1419
  },
1420
  {
1421
  "offsets": [
1422
- 1536
1423
  ],
1424
  "shape": [
1425
- 1536
1426
  ],
1427
  "filename_index": 1
1428
  }
1429
  ]
1430
  },
1431
- "h.2.mlp.c_fc.bias": {
1432
  "type": "Distributed",
1433
  "shape": [
1434
- 3072
1435
  ],
1436
  "dtype": "F32",
1437
  "chunks": [
@@ -1440,22 +1425,22 @@
1440
  0
1441
  ],
1442
  "shape": [
1443
- 1536
1444
  ],
1445
  "filename_index": 0
1446
  },
1447
  {
1448
  "offsets": [
1449
- 1536
1450
  ],
1451
  "shape": [
1452
- 1536
1453
  ],
1454
  "filename_index": 1
1455
  }
1456
  ]
1457
  },
1458
- "h.11.attn.c_proj.bias": {
1459
  "type": "Distributed",
1460
  "shape": [
1461
  768
@@ -1482,11 +1467,11 @@
1482
  }
1483
  ]
1484
  },
1485
- "h.11.attn.c_attn.weight": {
1486
  "type": "Distributed",
1487
  "shape": [
1488
- 768,
1489
- 2304
1490
  ],
1491
  "dtype": "F32",
1492
  "chunks": [
@@ -1496,29 +1481,29 @@
1496
  0
1497
  ],
1498
  "shape": [
1499
- 768,
1500
- 1152
1501
  ],
1502
  "filename_index": 0
1503
  },
1504
  {
1505
  "offsets": [
1506
- 0,
1507
- 1152
1508
  ],
1509
  "shape": [
1510
- 768,
1511
- 1152
1512
  ],
1513
  "filename_index": 1
1514
  }
1515
  ]
1516
  },
1517
- "h.0.attn.c_attn.weight": {
1518
  "type": "Distributed",
1519
  "shape": [
1520
  768,
1521
- 2304
1522
  ],
1523
  "dtype": "F32",
1524
  "chunks": [
@@ -1528,25 +1513,25 @@
1528
  0
1529
  ],
1530
  "shape": [
1531
- 768,
1532
- 1152
1533
  ],
1534
  "filename_index": 0
1535
  },
1536
  {
1537
  "offsets": [
1538
- 0,
1539
- 1152
1540
  ],
1541
  "shape": [
1542
- 768,
1543
- 1152
1544
  ],
1545
  "filename_index": 1
1546
  }
1547
  ]
1548
  },
1549
- "h.2.attn.bias": {
1550
  "type": "Distributed",
1551
  "shape": [
1552
  1,
@@ -1588,10 +1573,10 @@
1588
  }
1589
  ]
1590
  },
1591
- "wte.weight": {
1592
  "type": "Distributed",
1593
  "shape": [
1594
- 50257,
1595
  768
1596
  ],
1597
  "dtype": "F32",
@@ -1602,25 +1587,25 @@
1602
  0
1603
  ],
1604
  "shape": [
1605
- 50257,
1606
- 384
1607
  ],
1608
  "filename_index": 0
1609
  },
1610
  {
1611
  "offsets": [
1612
- 0,
1613
- 384
1614
  ],
1615
  "shape": [
1616
- 50257,
1617
- 384
1618
  ],
1619
  "filename_index": 1
1620
  }
1621
  ]
1622
  },
1623
- "h.11.ln_2.weight": {
1624
  "type": "Distributed",
1625
  "shape": [
1626
  768
@@ -1647,34 +1632,39 @@
1647
  }
1648
  ]
1649
  },
1650
- "h.6.attn.c_attn.bias": {
1651
  "type": "Distributed",
1652
  "shape": [
1653
- 2304
 
1654
  ],
1655
  "dtype": "F32",
1656
  "chunks": [
1657
  {
1658
  "offsets": [
 
1659
  0
1660
  ],
1661
  "shape": [
1662
- 1152
 
1663
  ],
1664
  "filename_index": 0
1665
  },
1666
  {
1667
  "offsets": [
1668
- 1152
 
1669
  ],
1670
  "shape": [
1671
- 1152
 
1672
  ],
1673
  "filename_index": 1
1674
  }
1675
  ]
1676
  },
1677
- "h.1.ln_2.weight": {
1678
  "type": "Distributed",
1679
  "shape": [
1680
  768
@@ -1701,7 +1691,7 @@
1701
  }
1702
  ]
1703
  },
1704
- "ln_f.bias": {
1705
  "type": "Distributed",
1706
  "shape": [
1707
  768
@@ -1728,10 +1718,10 @@
1728
  }
1729
  ]
1730
  },
1731
- "h.10.attn.c_attn.bias": {
1732
  "type": "Distributed",
1733
  "shape": [
1734
- 2304
1735
  ],
1736
  "dtype": "F32",
1737
  "chunks": [
@@ -1740,22 +1730,22 @@
1740
  0
1741
  ],
1742
  "shape": [
1743
- 1152
1744
  ],
1745
  "filename_index": 0
1746
  },
1747
  {
1748
  "offsets": [
1749
- 1152
1750
  ],
1751
  "shape": [
1752
- 1152
1753
  ],
1754
  "filename_index": 1
1755
  }
1756
  ]
1757
  },
1758
- "h.0.ln_1.bias": {
1759
  "type": "Distributed",
1760
  "shape": [
1761
  768
@@ -1782,10 +1772,10 @@
1782
  }
1783
  ]
1784
  },
1785
- "h.10.mlp.c_proj.bias": {
1786
  "type": "Distributed",
1787
  "shape": [
1788
- 768
1789
  ],
1790
  "dtype": "F32",
1791
  "chunks": [
@@ -1794,54 +1784,49 @@
1794
  0
1795
  ],
1796
  "shape": [
1797
- 384
1798
  ],
1799
  "filename_index": 0
1800
  },
1801
  {
1802
  "offsets": [
1803
- 384
1804
  ],
1805
  "shape": [
1806
- 384
1807
  ],
1808
  "filename_index": 1
1809
  }
1810
  ]
1811
  },
1812
- "h.8.attn.c_proj.weight": {
1813
  "type": "Distributed",
1814
  "shape": [
1815
- 768,
1816
  768
1817
  ],
1818
  "dtype": "F32",
1819
  "chunks": [
1820
  {
1821
  "offsets": [
1822
- 0,
1823
  0
1824
  ],
1825
  "shape": [
1826
- 384,
1827
- 768
1828
  ],
1829
  "filename_index": 0
1830
  },
1831
  {
1832
  "offsets": [
1833
- 384,
1834
- 0
1835
  ],
1836
  "shape": [
1837
- 384,
1838
- 768
1839
  ],
1840
  "filename_index": 1
1841
  }
1842
  ]
1843
  },
1844
- "h.10.ln_2.weight": {
1845
  "type": "Distributed",
1846
  "shape": [
1847
  768
@@ -1868,11 +1853,11 @@
1868
  }
1869
  ]
1870
  },
1871
- "h.9.attn.c_proj.weight": {
1872
  "type": "Distributed",
1873
  "shape": [
1874
  768,
1875
- 768
1876
  ],
1877
  "dtype": "F32",
1878
  "chunks": [
@@ -1882,25 +1867,25 @@
1882
  0
1883
  ],
1884
  "shape": [
1885
- 384,
1886
- 768
1887
  ],
1888
  "filename_index": 0
1889
  },
1890
  {
1891
  "offsets": [
1892
- 384,
1893
- 0
1894
  ],
1895
  "shape": [
1896
- 384,
1897
- 768
1898
  ],
1899
  "filename_index": 1
1900
  }
1901
  ]
1902
  },
1903
- "h.0.ln_2.bias": {
1904
  "type": "Distributed",
1905
  "shape": [
1906
  768
@@ -1927,64 +1912,74 @@
1927
  }
1928
  ]
1929
  },
1930
- "h.10.ln_2.bias": {
1931
  "type": "Distributed",
1932
  "shape": [
 
1933
  768
1934
  ],
1935
  "dtype": "F32",
1936
  "chunks": [
1937
  {
1938
  "offsets": [
 
1939
  0
1940
  ],
1941
  "shape": [
1942
- 384
 
1943
  ],
1944
  "filename_index": 0
1945
  },
1946
  {
1947
  "offsets": [
1948
- 384
 
1949
  ],
1950
  "shape": [
1951
- 384
 
1952
  ],
1953
  "filename_index": 1
1954
  }
1955
  ]
1956
  },
1957
- "h.1.mlp.c_proj.bias": {
1958
  "type": "Distributed",
1959
  "shape": [
1960
- 768
 
1961
  ],
1962
  "dtype": "F32",
1963
  "chunks": [
1964
  {
1965
  "offsets": [
 
1966
  0
1967
  ],
1968
  "shape": [
1969
- 384
 
1970
  ],
1971
  "filename_index": 0
1972
  },
1973
  {
1974
  "offsets": [
1975
- 384
 
1976
  ],
1977
  "shape": [
1978
- 384
 
1979
  ],
1980
  "filename_index": 1
1981
  }
1982
  ]
1983
  },
1984
- "h.7.ln_2.bias": {
1985
  "type": "Distributed",
1986
  "shape": [
1987
- 768
1988
  ],
1989
  "dtype": "F32",
1990
  "chunks": [
@@ -1993,22 +1988,22 @@
1993
  0
1994
  ],
1995
  "shape": [
1996
- 384
1997
  ],
1998
  "filename_index": 0
1999
  },
2000
  {
2001
  "offsets": [
2002
- 384
2003
  ],
2004
  "shape": [
2005
- 384
2006
  ],
2007
  "filename_index": 1
2008
  }
2009
  ]
2010
  },
2011
- "h.7.ln_1.weight": {
2012
  "type": "Distributed",
2013
  "shape": [
2014
  768
@@ -2035,11 +2030,11 @@
2035
  }
2036
  ]
2037
  },
2038
- "h.1.mlp.c_proj.weight": {
2039
  "type": "Distributed",
2040
  "shape": [
2041
- 3072,
2042
- 768
2043
  ],
2044
  "dtype": "F32",
2045
  "chunks": [
@@ -2049,79 +2044,89 @@
2049
  0
2050
  ],
2051
  "shape": [
2052
- 1536,
2053
- 768
2054
  ],
2055
  "filename_index": 0
2056
  },
2057
  {
2058
  "offsets": [
2059
- 1536,
2060
- 0
2061
  ],
2062
  "shape": [
2063
- 1536,
2064
- 768
2065
  ],
2066
  "filename_index": 1
2067
  }
2068
  ]
2069
  },
2070
- "h.3.ln_2.bias": {
2071
  "type": "Distributed",
2072
  "shape": [
 
2073
  768
2074
  ],
2075
  "dtype": "F32",
2076
  "chunks": [
2077
  {
2078
  "offsets": [
 
2079
  0
2080
  ],
2081
  "shape": [
2082
- 384
 
2083
  ],
2084
  "filename_index": 0
2085
  },
2086
  {
2087
  "offsets": [
2088
- 384
 
2089
  ],
2090
  "shape": [
2091
- 384
 
2092
  ],
2093
  "filename_index": 1
2094
  }
2095
  ]
2096
  },
2097
- "h.4.mlp.c_fc.bias": {
2098
  "type": "Distributed",
2099
  "shape": [
2100
- 3072
 
2101
  ],
2102
  "dtype": "F32",
2103
  "chunks": [
2104
  {
2105
  "offsets": [
 
2106
  0
2107
  ],
2108
  "shape": [
2109
- 1536
 
2110
  ],
2111
  "filename_index": 0
2112
  },
2113
  {
2114
  "offsets": [
2115
- 1536
 
2116
  ],
2117
  "shape": [
2118
- 1536
 
2119
  ],
2120
  "filename_index": 1
2121
  }
2122
  ]
2123
  },
2124
- "h.8.ln_2.weight": {
2125
  "type": "Distributed",
2126
  "shape": [
2127
  768
@@ -2148,11 +2153,11 @@
2148
  }
2149
  ]
2150
  },
2151
- "h.1.attn.c_proj.weight": {
2152
  "type": "Distributed",
2153
  "shape": [
2154
  768,
2155
- 768
2156
  ],
2157
  "dtype": "F32",
2158
  "chunks": [
@@ -2162,25 +2167,25 @@
2162
  0
2163
  ],
2164
  "shape": [
2165
- 384,
2166
- 768
2167
  ],
2168
  "filename_index": 0
2169
  },
2170
  {
2171
  "offsets": [
2172
- 384,
2173
- 0
2174
  ],
2175
  "shape": [
2176
- 384,
2177
- 768
2178
  ],
2179
  "filename_index": 1
2180
  }
2181
  ]
2182
  },
2183
- "h.6.ln_1.weight": {
2184
  "type": "Distributed",
2185
  "shape": [
2186
  768
@@ -2207,10 +2212,10 @@
2207
  }
2208
  ]
2209
  },
2210
- "h.1.attn.c_attn.bias": {
2211
  "type": "Distributed",
2212
  "shape": [
2213
- 2304
2214
  ],
2215
  "dtype": "F32",
2216
  "chunks": [
@@ -2219,76 +2224,86 @@
2219
  0
2220
  ],
2221
  "shape": [
2222
- 1152
2223
  ],
2224
  "filename_index": 0
2225
  },
2226
  {
2227
  "offsets": [
2228
- 1152
2229
  ],
2230
  "shape": [
2231
- 1152
2232
  ],
2233
  "filename_index": 1
2234
  }
2235
  ]
2236
  },
2237
- "h.3.mlp.c_fc.bias": {
2238
  "type": "Distributed",
2239
  "shape": [
2240
- 3072
 
2241
  ],
2242
  "dtype": "F32",
2243
  "chunks": [
2244
  {
2245
  "offsets": [
 
2246
  0
2247
  ],
2248
  "shape": [
2249
- 1536
 
2250
  ],
2251
  "filename_index": 0
2252
  },
2253
  {
2254
  "offsets": [
2255
- 1536
2256
- ],
 
2257
  "shape": [
2258
- 1536
 
2259
  ],
2260
  "filename_index": 1
2261
  }
2262
  ]
2263
  },
2264
- "h.1.ln_1.bias": {
2265
  "type": "Distributed",
2266
  "shape": [
2267
- 768
 
2268
  ],
2269
  "dtype": "F32",
2270
  "chunks": [
2271
  {
2272
  "offsets": [
 
2273
  0
2274
  ],
2275
  "shape": [
2276
- 384
 
2277
  ],
2278
  "filename_index": 0
2279
  },
2280
  {
2281
  "offsets": [
2282
- 384
 
2283
  ],
2284
  "shape": [
2285
- 384
 
2286
  ],
2287
  "filename_index": 1
2288
  }
2289
  ]
2290
  },
2291
- "h.9.ln_2.bias": {
2292
  "type": "Distributed",
2293
  "shape": [
2294
  768
@@ -2315,10 +2330,10 @@
2315
  }
2316
  ]
2317
  },
2318
- "h.8.ln_1.weight": {
2319
  "type": "Distributed",
2320
  "shape": [
2321
- 768
2322
  ],
2323
  "dtype": "F32",
2324
  "chunks": [
@@ -2327,54 +2342,49 @@
2327
  0
2328
  ],
2329
  "shape": [
2330
- 384
2331
  ],
2332
  "filename_index": 0
2333
  },
2334
  {
2335
  "offsets": [
2336
- 384
2337
  ],
2338
  "shape": [
2339
- 384
2340
  ],
2341
  "filename_index": 1
2342
  }
2343
  ]
2344
  },
2345
- "h.2.attn.c_proj.weight": {
2346
  "type": "Distributed",
2347
  "shape": [
2348
- 768,
2349
- 768
2350
  ],
2351
  "dtype": "F32",
2352
  "chunks": [
2353
  {
2354
  "offsets": [
2355
- 0,
2356
  0
2357
  ],
2358
  "shape": [
2359
- 384,
2360
- 768
2361
  ],
2362
  "filename_index": 0
2363
  },
2364
  {
2365
  "offsets": [
2366
- 384,
2367
- 0
2368
  ],
2369
  "shape": [
2370
- 384,
2371
- 768
2372
  ],
2373
  "filename_index": 1
2374
  }
2375
  ]
2376
  },
2377
- "h.8.attn.bias": {
2378
  "type": "Distributed",
2379
  "shape": [
2380
  1,
@@ -2416,11 +2426,11 @@
2416
  }
2417
  ]
2418
  },
2419
- "h.1.attn.c_attn.weight": {
2420
  "type": "Distributed",
2421
  "shape": [
2422
- 768,
2423
- 2304
2424
  ],
2425
  "dtype": "F32",
2426
  "chunks": [
@@ -2430,52 +2440,67 @@
2430
  0
2431
  ],
2432
  "shape": [
2433
- 768,
2434
- 1152
2435
  ],
2436
  "filename_index": 0
2437
  },
2438
  {
2439
  "offsets": [
2440
- 0,
2441
- 1152
2442
  ],
2443
  "shape": [
2444
- 768,
2445
- 1152
2446
  ],
2447
  "filename_index": 1
2448
  }
2449
  ]
2450
  },
2451
- "h.0.ln_1.weight": {
2452
  "type": "Distributed",
2453
  "shape": [
2454
- 768
 
 
 
2455
  ],
2456
  "dtype": "F32",
2457
  "chunks": [
2458
  {
2459
  "offsets": [
 
 
 
2460
  0
2461
  ],
2462
  "shape": [
2463
- 384
 
 
 
2464
  ],
2465
  "filename_index": 0
2466
  },
2467
  {
2468
  "offsets": [
2469
- 384
 
 
 
2470
  ],
2471
  "shape": [
2472
- 384
 
 
 
2473
  ],
2474
  "filename_index": 1
2475
  }
2476
  ]
2477
  },
2478
- "h.7.attn.c_proj.bias": {
2479
  "type": "Distributed",
2480
  "shape": [
2481
  768
@@ -2502,39 +2527,34 @@
2502
  }
2503
  ]
2504
  },
2505
- "h.10.mlp.c_fc.weight": {
2506
  "type": "Distributed",
2507
  "shape": [
2508
- 768,
2509
- 3072
2510
  ],
2511
  "dtype": "F32",
2512
  "chunks": [
2513
  {
2514
  "offsets": [
2515
- 0,
2516
  0
2517
  ],
2518
  "shape": [
2519
- 768,
2520
- 1536
2521
  ],
2522
  "filename_index": 0
2523
  },
2524
  {
2525
  "offsets": [
2526
- 0,
2527
- 1536
2528
  ],
2529
  "shape": [
2530
- 768,
2531
- 1536
2532
  ],
2533
  "filename_index": 1
2534
  }
2535
  ]
2536
  },
2537
- "h.2.mlp.c_proj.weight": {
2538
  "type": "Distributed",
2539
  "shape": [
2540
  3072,
@@ -2566,75 +2586,82 @@
2566
  }
2567
  ]
2568
  },
2569
- "h.7.attn.bias": {
2570
  "type": "Distributed",
2571
  "shape": [
2572
- 1,
2573
- 1,
2574
- 1024,
2575
- 1024
2576
  ],
2577
  "dtype": "F32",
2578
  "chunks": [
2579
  {
2580
  "offsets": [
2581
- 0,
2582
- 0,
2583
- 0,
2584
  0
2585
  ],
2586
  "shape": [
2587
- 1,
2588
- 1,
2589
- 1024,
2590
- 512
2591
  ],
2592
  "filename_index": 0
2593
  },
2594
  {
2595
  "offsets": [
2596
- 0,
2597
- 0,
2598
- 0,
2599
- 512
2600
  ],
2601
  "shape": [
2602
- 1,
2603
- 1,
2604
- 1024,
2605
- 512
2606
  ],
2607
  "filename_index": 1
2608
  }
2609
  ]
2610
  },
2611
- "h.2.attn.c_attn.weight": {
2612
  "type": "Distributed",
2613
  "shape": [
2614
- 768,
2615
- 2304
2616
  ],
2617
  "dtype": "F32",
2618
  "chunks": [
2619
  {
2620
  "offsets": [
2621
- 0,
2622
  0
2623
  ],
2624
  "shape": [
2625
- 768,
2626
- 1152
2627
  ],
2628
  "filename_index": 0
2629
  },
2630
  {
2631
  "offsets": [
2632
- 0,
2633
- 1152
2634
  ],
2635
  "shape": [
2636
- 768,
2637
- 1152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2638
  ],
2639
  "filename_index": 1
2640
  }
@@ -2672,42 +2699,52 @@
2672
  }
2673
  ]
2674
  },
2675
- "h.2.mlp.c_fc.weight": {
2676
  "type": "Distributed",
2677
  "shape": [
2678
- 768,
2679
- 3072
 
 
2680
  ],
2681
  "dtype": "F32",
2682
  "chunks": [
2683
  {
2684
  "offsets": [
 
 
2685
  0,
2686
  0
2687
  ],
2688
  "shape": [
2689
- 768,
2690
- 1536
 
 
2691
  ],
2692
  "filename_index": 0
2693
  },
2694
  {
2695
  "offsets": [
2696
  0,
2697
- 1536
 
 
2698
  ],
2699
  "shape": [
2700
- 768,
2701
- 1536
 
 
2702
  ],
2703
  "filename_index": 1
2704
  }
2705
  ]
2706
  },
2707
- "h.9.mlp.c_proj.weight": {
2708
  "type": "Distributed",
2709
  "shape": [
2710
- 3072,
2711
  768
2712
  ],
2713
  "dtype": "F32",
@@ -2718,28 +2755,28 @@
2718
  0
2719
  ],
2720
  "shape": [
2721
- 1536,
2722
  768
2723
  ],
2724
  "filename_index": 0
2725
  },
2726
  {
2727
  "offsets": [
2728
- 1536,
2729
  0
2730
  ],
2731
  "shape": [
2732
- 1536,
2733
  768
2734
  ],
2735
  "filename_index": 1
2736
  }
2737
  ]
2738
  },
2739
- "h.5.ln_2.weight": {
2740
  "type": "Distributed",
2741
  "shape": [
2742
- 768
2743
  ],
2744
  "dtype": "F32",
2745
  "chunks": [
@@ -2748,22 +2785,22 @@
2748
  0
2749
  ],
2750
  "shape": [
2751
- 384
2752
  ],
2753
  "filename_index": 0
2754
  },
2755
  {
2756
  "offsets": [
2757
- 384
2758
  ],
2759
  "shape": [
2760
- 384
2761
  ],
2762
  "filename_index": 1
2763
  }
2764
  ]
2765
  },
2766
- "h.10.attn.c_attn.weight": {
2767
  "type": "Distributed",
2768
  "shape": [
2769
  768,
@@ -2795,7 +2832,7 @@
2795
  }
2796
  ]
2797
  },
2798
- "h.9.attn.c_proj.bias": {
2799
  "type": "Distributed",
2800
  "shape": [
2801
  768
@@ -2822,49 +2859,34 @@
2822
  }
2823
  ]
2824
  },
2825
- "h.11.attn.bias": {
2826
  "type": "Distributed",
2827
  "shape": [
2828
- 1,
2829
- 1,
2830
- 1024,
2831
- 1024
2832
  ],
2833
  "dtype": "F32",
2834
  "chunks": [
2835
  {
2836
  "offsets": [
2837
- 0,
2838
- 0,
2839
- 0,
2840
  0
2841
  ],
2842
  "shape": [
2843
- 1,
2844
- 1,
2845
- 1024,
2846
- 512
2847
  ],
2848
  "filename_index": 0
2849
  },
2850
  {
2851
  "offsets": [
2852
- 0,
2853
- 0,
2854
- 0,
2855
- 512
2856
  ],
2857
  "shape": [
2858
- 1,
2859
- 1,
2860
- 1024,
2861
- 512
2862
  ],
2863
  "filename_index": 1
2864
  }
2865
  ]
2866
  },
2867
- "h.0.mlp.c_proj.bias": {
2868
  "type": "Distributed",
2869
  "shape": [
2870
  768
@@ -2891,7 +2913,7 @@
2891
  }
2892
  ]
2893
  },
2894
- "h.7.mlp.c_proj.bias": {
2895
  "type": "Distributed",
2896
  "shape": [
2897
  768
@@ -2918,10 +2940,10 @@
2918
  }
2919
  ]
2920
  },
2921
- "h.5.ln_2.bias": {
2922
  "type": "Distributed",
2923
  "shape": [
2924
- 768
2925
  ],
2926
  "dtype": "F32",
2927
  "chunks": [
@@ -2930,49 +2952,54 @@
2930
  0
2931
  ],
2932
  "shape": [
2933
- 384
2934
  ],
2935
  "filename_index": 0
2936
  },
2937
  {
2938
  "offsets": [
2939
- 384
2940
  ],
2941
  "shape": [
2942
- 384
2943
  ],
2944
  "filename_index": 1
2945
  }
2946
  ]
2947
  },
2948
- "h.1.ln_2.bias": {
2949
  "type": "Distributed",
2950
  "shape": [
2951
- 768
 
2952
  ],
2953
  "dtype": "F32",
2954
  "chunks": [
2955
  {
2956
  "offsets": [
 
2957
  0
2958
  ],
2959
  "shape": [
2960
- 384
 
2961
  ],
2962
  "filename_index": 0
2963
  },
2964
  {
2965
  "offsets": [
2966
- 384
 
2967
  ],
2968
  "shape": [
2969
- 384
 
2970
  ],
2971
  "filename_index": 1
2972
  }
2973
  ]
2974
  },
2975
- "h.4.attn.bias": {
2976
  "type": "Distributed",
2977
  "shape": [
2978
  1,
@@ -3014,34 +3041,7 @@
3014
  }
3015
  ]
3016
  },
3017
- "h.10.ln_1.weight": {
3018
- "type": "Distributed",
3019
- "shape": [
3020
- 768
3021
- ],
3022
- "dtype": "F32",
3023
- "chunks": [
3024
- {
3025
- "offsets": [
3026
- 0
3027
- ],
3028
- "shape": [
3029
- 384
3030
- ],
3031
- "filename_index": 0
3032
- },
3033
- {
3034
- "offsets": [
3035
- 384
3036
- ],
3037
- "shape": [
3038
- 384
3039
- ],
3040
- "filename_index": 1
3041
- }
3042
- ]
3043
- },
3044
- "h.10.mlp.c_proj.weight": {
3045
  "type": "Distributed",
3046
  "shape": [
3047
  3072,
@@ -3073,7 +3073,7 @@
3073
  }
3074
  ]
3075
  },
3076
- "h.4.attn.c_attn.weight": {
3077
  "type": "Distributed",
3078
  "shape": [
3079
  768,
@@ -3105,7 +3105,7 @@
3105
  }
3106
  ]
3107
  },
3108
- "h.11.mlp.c_fc.weight": {
3109
  "type": "Distributed",
3110
  "shape": [
3111
  768,
@@ -3137,71 +3137,61 @@
3137
  }
3138
  ]
3139
  },
3140
- "h.9.attn.c_attn.weight": {
3141
  "type": "Distributed",
3142
  "shape": [
3143
- 768,
3144
- 2304
3145
  ],
3146
  "dtype": "F32",
3147
  "chunks": [
3148
  {
3149
  "offsets": [
3150
- 0,
3151
  0
3152
  ],
3153
  "shape": [
3154
- 768,
3155
- 1152
3156
  ],
3157
  "filename_index": 0
3158
  },
3159
  {
3160
  "offsets": [
3161
- 0,
3162
- 1152
3163
  ],
3164
  "shape": [
3165
- 768,
3166
- 1152
3167
  ],
3168
  "filename_index": 1
3169
  }
3170
  ]
3171
  },
3172
- "h.8.attn.c_attn.weight": {
3173
  "type": "Distributed",
3174
  "shape": [
3175
- 768,
3176
- 2304
3177
  ],
3178
  "dtype": "F32",
3179
  "chunks": [
3180
  {
3181
  "offsets": [
3182
- 0,
3183
  0
3184
  ],
3185
  "shape": [
3186
- 768,
3187
- 1152
3188
  ],
3189
  "filename_index": 0
3190
  },
3191
  {
3192
  "offsets": [
3193
- 0,
3194
- 1152
3195
  ],
3196
  "shape": [
3197
- 768,
3198
- 1152
3199
  ],
3200
  "filename_index": 1
3201
  }
3202
  ]
3203
  },
3204
- "h.0.attn.c_proj.bias": {
3205
  "type": "Distributed",
3206
  "shape": [
3207
  768
@@ -3228,7 +3218,7 @@
3228
  }
3229
  ]
3230
  },
3231
- "h.8.ln_2.bias": {
3232
  "type": "Distributed",
3233
  "shape": [
3234
  768
@@ -3255,7 +3245,39 @@
3255
  }
3256
  ]
3257
  },
3258
- "h.10.attn.c_proj.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3259
  "type": "Distributed",
3260
  "shape": [
3261
  768
@@ -3282,103 +3304,88 @@
3282
  }
3283
  ]
3284
  },
3285
- "h.5.attn.c_proj.weight": {
3286
  "type": "Distributed",
3287
  "shape": [
3288
- 768,
3289
  768
3290
  ],
3291
  "dtype": "F32",
3292
  "chunks": [
3293
  {
3294
  "offsets": [
3295
- 0,
3296
  0
3297
  ],
3298
  "shape": [
3299
- 384,
3300
- 768
3301
  ],
3302
  "filename_index": 0
3303
  },
3304
  {
3305
  "offsets": [
3306
- 384,
3307
- 0
3308
  ],
3309
  "shape": [
3310
- 384,
3311
- 768
3312
  ],
3313
  "filename_index": 1
3314
  }
3315
  ]
3316
  },
3317
- "h.4.mlp.c_fc.weight": {
3318
  "type": "Distributed",
3319
  "shape": [
3320
- 768,
3321
- 3072
3322
  ],
3323
  "dtype": "F32",
3324
  "chunks": [
3325
  {
3326
  "offsets": [
3327
- 0,
3328
  0
3329
  ],
3330
  "shape": [
3331
- 768,
3332
- 1536
3333
  ],
3334
  "filename_index": 0
3335
  },
3336
  {
3337
  "offsets": [
3338
- 0,
3339
- 1536
3340
  ],
3341
  "shape": [
3342
- 768,
3343
- 1536
3344
  ],
3345
  "filename_index": 1
3346
  }
3347
  ]
3348
  },
3349
- "wpe.weight": {
3350
  "type": "Distributed",
3351
  "shape": [
3352
- 1024,
3353
  768
3354
  ],
3355
  "dtype": "F32",
3356
  "chunks": [
3357
  {
3358
  "offsets": [
3359
- 0,
3360
  0
3361
  ],
3362
  "shape": [
3363
- 1024,
3364
  384
3365
  ],
3366
  "filename_index": 0
3367
  },
3368
  {
3369
  "offsets": [
3370
- 0,
3371
  384
3372
  ],
3373
  "shape": [
3374
- 1024,
3375
  384
3376
  ],
3377
  "filename_index": 1
3378
  }
3379
  ]
3380
  },
3381
- "h.6.ln_2.weight": {
3382
  "type": "Distributed",
3383
  "shape": [
3384
  768
@@ -3405,39 +3412,61 @@
3405
  }
3406
  ]
3407
  },
3408
- "h.10.attn.c_proj.weight": {
3409
  "type": "Distributed",
3410
  "shape": [
3411
- 768,
3412
- 768
3413
  ],
3414
  "dtype": "F32",
3415
  "chunks": [
3416
  {
3417
  "offsets": [
3418
- 0,
3419
  0
3420
  ],
3421
  "shape": [
3422
- 384,
3423
- 768
3424
  ],
3425
  "filename_index": 0
3426
  },
3427
  {
3428
  "offsets": [
3429
- 384,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3430
  0
3431
  ],
3432
  "shape": [
3433
- 384,
3434
- 768
 
 
 
 
 
 
 
 
3435
  ],
3436
  "filename_index": 1
3437
  }
3438
  ]
3439
  },
3440
- "h.3.attn.bias": {
3441
  "type": "Distributed",
3442
  "shape": [
3443
  1,
@@ -3479,37 +3508,52 @@
3479
  }
3480
  ]
3481
  },
3482
- "h.11.attn.c_attn.bias": {
3483
  "type": "Distributed",
3484
  "shape": [
3485
- 2304
 
 
 
3486
  ],
3487
  "dtype": "F32",
3488
  "chunks": [
3489
  {
3490
  "offsets": [
 
 
 
3491
  0
3492
  ],
3493
  "shape": [
3494
- 1152
 
 
 
3495
  ],
3496
  "filename_index": 0
3497
  },
3498
  {
3499
  "offsets": [
3500
- 1152
 
 
 
3501
  ],
3502
  "shape": [
3503
- 1152
 
 
 
3504
  ],
3505
  "filename_index": 1
3506
  }
3507
  ]
3508
  },
3509
- "h.2.ln_1.bias": {
3510
  "type": "Distributed",
3511
  "shape": [
3512
- 768
3513
  ],
3514
  "dtype": "F32",
3515
  "chunks": [
@@ -3518,22 +3562,22 @@
3518
  0
3519
  ],
3520
  "shape": [
3521
- 384
3522
  ],
3523
  "filename_index": 0
3524
  },
3525
  {
3526
  "offsets": [
3527
- 384
3528
  ],
3529
  "shape": [
3530
- 384
3531
  ],
3532
  "filename_index": 1
3533
  }
3534
  ]
3535
  },
3536
- "h.9.ln_1.weight": {
3537
  "type": "Distributed",
3538
  "shape": [
3539
  768
@@ -3560,108 +3604,98 @@
3560
  }
3561
  ]
3562
  },
3563
- "h.8.mlp.c_fc.weight": {
3564
  "type": "Distributed",
3565
  "shape": [
3566
- 768,
3567
- 3072
3568
  ],
3569
  "dtype": "F32",
3570
  "chunks": [
3571
  {
3572
  "offsets": [
3573
- 0,
3574
  0
3575
  ],
3576
  "shape": [
3577
- 768,
3578
- 1536
3579
  ],
3580
  "filename_index": 0
3581
  },
3582
  {
3583
  "offsets": [
3584
- 0,
3585
- 1536
3586
  ],
3587
  "shape": [
3588
- 768,
3589
- 1536
3590
  ],
3591
  "filename_index": 1
3592
  }
3593
  ]
3594
  },
3595
- "h.2.ln_2.weight": {
3596
  "type": "Distributed",
3597
  "shape": [
3598
- 768
 
3599
  ],
3600
  "dtype": "F32",
3601
  "chunks": [
3602
  {
3603
  "offsets": [
 
3604
  0
3605
  ],
3606
  "shape": [
3607
- 384
 
3608
  ],
3609
  "filename_index": 0
3610
  },
3611
  {
3612
  "offsets": [
3613
- 384
 
3614
  ],
3615
  "shape": [
3616
- 384
 
3617
  ],
3618
  "filename_index": 1
3619
  }
3620
  ]
3621
  },
3622
- "h.0.attn.bias": {
3623
  "type": "Distributed",
3624
  "shape": [
3625
- 1,
3626
- 1,
3627
- 1024,
3628
- 1024
3629
  ],
3630
  "dtype": "F32",
3631
  "chunks": [
3632
  {
3633
  "offsets": [
3634
- 0,
3635
- 0,
3636
  0,
3637
  0
3638
  ],
3639
  "shape": [
3640
- 1,
3641
- 1,
3642
- 1024,
3643
- 512
3644
  ],
3645
  "filename_index": 0
3646
  },
3647
  {
3648
  "offsets": [
3649
  0,
3650
- 0,
3651
- 0,
3652
- 512
3653
  ],
3654
  "shape": [
3655
- 1,
3656
- 1,
3657
- 1024,
3658
- 512
3659
  ],
3660
  "filename_index": 1
3661
  }
3662
  ]
3663
  },
3664
- "h.5.mlp.c_fc.weight": {
3665
  "type": "Distributed",
3666
  "shape": [
3667
  768,
@@ -3693,39 +3727,34 @@
3693
  }
3694
  ]
3695
  },
3696
- "h.11.mlp.c_proj.weight": {
3697
  "type": "Distributed",
3698
  "shape": [
3699
- 3072,
3700
  768
3701
  ],
3702
  "dtype": "F32",
3703
  "chunks": [
3704
  {
3705
  "offsets": [
3706
- 0,
3707
  0
3708
  ],
3709
  "shape": [
3710
- 1536,
3711
- 768
3712
  ],
3713
  "filename_index": 0
3714
  },
3715
  {
3716
  "offsets": [
3717
- 1536,
3718
- 0
3719
  ],
3720
  "shape": [
3721
- 1536,
3722
- 768
3723
  ],
3724
  "filename_index": 1
3725
  }
3726
  ]
3727
  },
3728
- "h.7.ln_1.bias": {
3729
  "type": "Distributed",
3730
  "shape": [
3731
  768
@@ -3752,10 +3781,10 @@
3752
  }
3753
  ]
3754
  },
3755
- "h.8.attn.c_attn.bias": {
3756
  "type": "Distributed",
3757
  "shape": [
3758
- 2304
3759
  ],
3760
  "dtype": "F32",
3761
  "chunks": [
@@ -3764,22 +3793,22 @@
3764
  0
3765
  ],
3766
  "shape": [
3767
- 1152
3768
  ],
3769
  "filename_index": 0
3770
  },
3771
  {
3772
  "offsets": [
3773
- 1152
3774
  ],
3775
  "shape": [
3776
- 1152
3777
  ],
3778
  "filename_index": 1
3779
  }
3780
  ]
3781
  },
3782
- "h.6.attn.c_proj.bias": {
3783
  "type": "Distributed",
3784
  "shape": [
3785
  768
@@ -3806,11 +3835,11 @@
3806
  }
3807
  ]
3808
  },
3809
- "h.3.mlp.c_fc.weight": {
3810
  "type": "Distributed",
3811
  "shape": [
3812
  768,
3813
- 3072
3814
  ],
3815
  "dtype": "F32",
3816
  "chunks": [
@@ -3820,55 +3849,60 @@
3820
  0
3821
  ],
3822
  "shape": [
3823
- 768,
3824
- 1536
3825
  ],
3826
  "filename_index": 0
3827
  },
3828
  {
3829
  "offsets": [
3830
- 0,
3831
- 1536
3832
  ],
3833
  "shape": [
3834
- 768,
3835
- 1536
3836
  ],
3837
  "filename_index": 1
3838
  }
3839
  ]
3840
  },
3841
- "h.4.ln_2.weight": {
3842
  "type": "Distributed",
3843
  "shape": [
 
3844
  768
3845
  ],
3846
  "dtype": "F32",
3847
  "chunks": [
3848
  {
3849
  "offsets": [
 
3850
  0
3851
  ],
3852
  "shape": [
3853
- 384
 
3854
  ],
3855
  "filename_index": 0
3856
  },
3857
  {
3858
  "offsets": [
3859
- 384
 
3860
  ],
3861
  "shape": [
3862
- 384
 
3863
  ],
3864
  "filename_index": 1
3865
  }
3866
  ]
3867
  },
3868
- "h.5.attn.c_attn.bias": {
3869
  "type": "Distributed",
3870
  "shape": [
3871
- 2304
3872
  ],
3873
  "dtype": "F32",
3874
  "chunks": [
@@ -3877,86 +3911,76 @@
3877
  0
3878
  ],
3879
  "shape": [
3880
- 1152
3881
  ],
3882
  "filename_index": 0
3883
  },
3884
  {
3885
  "offsets": [
3886
- 1152
3887
  ],
3888
  "shape": [
3889
- 1152
3890
  ],
3891
  "filename_index": 1
3892
  }
3893
  ]
3894
  },
3895
- "h.8.mlp.c_proj.weight": {
3896
  "type": "Distributed",
3897
  "shape": [
3898
- 3072,
3899
  768
3900
  ],
3901
  "dtype": "F32",
3902
  "chunks": [
3903
  {
3904
  "offsets": [
3905
- 0,
3906
  0
3907
  ],
3908
  "shape": [
3909
- 1536,
3910
- 768
3911
  ],
3912
  "filename_index": 0
3913
  },
3914
  {
3915
  "offsets": [
3916
- 1536,
3917
- 0
3918
  ],
3919
  "shape": [
3920
- 1536,
3921
- 768
3922
  ],
3923
  "filename_index": 1
3924
  }
3925
  ]
3926
  },
3927
- "h.5.mlp.c_proj.weight": {
3928
  "type": "Distributed",
3929
  "shape": [
3930
- 3072,
3931
  768
3932
  ],
3933
  "dtype": "F32",
3934
  "chunks": [
3935
  {
3936
  "offsets": [
3937
- 0,
3938
  0
3939
  ],
3940
  "shape": [
3941
- 1536,
3942
- 768
3943
  ],
3944
  "filename_index": 0
3945
  },
3946
  {
3947
  "offsets": [
3948
- 1536,
3949
- 0
3950
  ],
3951
  "shape": [
3952
- 1536,
3953
- 768
3954
  ],
3955
  "filename_index": 1
3956
  }
3957
  ]
3958
  },
3959
- "h.4.mlp.c_proj.bias": {
3960
  "type": "Distributed",
3961
  "shape": [
3962
  768
@@ -3983,66 +4007,76 @@
3983
  }
3984
  ]
3985
  },
3986
- "h.6.mlp.c_fc.weight": {
3987
  "type": "Distributed",
3988
  "shape": [
3989
- 768,
3990
- 3072
3991
  ],
3992
  "dtype": "F32",
3993
  "chunks": [
3994
  {
3995
  "offsets": [
3996
- 0,
3997
  0
3998
  ],
3999
  "shape": [
4000
- 768,
4001
- 1536
4002
  ],
4003
  "filename_index": 0
4004
  },
4005
  {
4006
  "offsets": [
4007
- 0,
4008
- 1536
4009
  ],
4010
  "shape": [
4011
- 768,
4012
- 1536
4013
  ],
4014
  "filename_index": 1
4015
  }
4016
  ]
4017
  },
4018
- "h.9.attn.c_attn.bias": {
4019
  "type": "Distributed",
4020
  "shape": [
4021
- 2304
 
 
 
4022
  ],
4023
  "dtype": "F32",
4024
  "chunks": [
4025
  {
4026
  "offsets": [
 
 
 
4027
  0
4028
  ],
4029
  "shape": [
4030
- 1152
 
 
 
4031
  ],
4032
  "filename_index": 0
4033
  },
4034
  {
4035
  "offsets": [
4036
- 1152
 
 
 
4037
  ],
4038
  "shape": [
4039
- 1152
 
 
 
4040
  ],
4041
  "filename_index": 1
4042
  }
4043
  ]
4044
  },
4045
- "h.8.attn.c_proj.bias": {
4046
  "type": "Distributed",
4047
  "shape": [
4048
  768
@@ -4069,37 +4103,42 @@
4069
  }
4070
  ]
4071
  },
4072
- "h.2.attn.c_attn.bias": {
4073
  "type": "Distributed",
4074
  "shape": [
 
4075
  2304
4076
  ],
4077
  "dtype": "F32",
4078
  "chunks": [
4079
  {
4080
  "offsets": [
 
4081
  0
4082
  ],
4083
  "shape": [
 
4084
  1152
4085
  ],
4086
  "filename_index": 0
4087
  },
4088
  {
4089
  "offsets": [
 
4090
  1152
4091
  ],
4092
  "shape": [
 
4093
  1152
4094
  ],
4095
  "filename_index": 1
4096
  }
4097
  ]
4098
  },
4099
- "h.0.mlp.c_proj.weight": {
4100
  "type": "Distributed",
4101
  "shape": [
4102
- 3072,
4103
  768
4104
  ],
4105
  "dtype": "F32",
@@ -4110,79 +4149,57 @@
4110
  0
4111
  ],
4112
  "shape": [
4113
- 1536,
4114
  768
4115
  ],
4116
  "filename_index": 0
4117
  },
4118
  {
4119
  "offsets": [
4120
- 1536,
4121
  0
4122
  ],
4123
  "shape": [
4124
- 1536,
4125
  768
4126
  ],
4127
  "filename_index": 1
4128
  }
4129
  ]
4130
  },
4131
- "h.11.ln_1.weight": {
4132
  "type": "Distributed",
4133
  "shape": [
 
4134
  768
4135
  ],
4136
  "dtype": "F32",
4137
  "chunks": [
4138
  {
4139
  "offsets": [
 
4140
  0
4141
  ],
4142
  "shape": [
4143
- 384
 
4144
  ],
4145
  "filename_index": 0
4146
  },
4147
  {
4148
  "offsets": [
4149
- 384
4150
- ],
4151
- "shape": [
4152
- 384
4153
- ],
4154
- "filename_index": 1
4155
- }
4156
- ]
4157
- },
4158
- "h.3.ln_1.bias": {
4159
- "type": "Distributed",
4160
- "shape": [
4161
- 768
4162
- ],
4163
- "dtype": "F32",
4164
- "chunks": [
4165
- {
4166
- "offsets": [
4167
  0
4168
  ],
4169
  "shape": [
4170
- 384
4171
- ],
4172
- "filename_index": 0
4173
- },
4174
- {
4175
- "offsets": [
4176
- 384
4177
- ],
4178
- "shape": [
4179
- 384
4180
  ],
4181
  "filename_index": 1
4182
  }
4183
  ]
4184
  },
4185
- "h.10.attn.bias": {
4186
  "type": "Distributed",
4187
  "shape": [
4188
  1,
@@ -4224,10 +4241,10 @@
4224
  }
4225
  ]
4226
  },
4227
- "h.10.ln_1.bias": {
4228
  "type": "Distributed",
4229
  "shape": [
4230
- 768
4231
  ],
4232
  "dtype": "F32",
4233
  "chunks": [
@@ -4236,25 +4253,25 @@
4236
  0
4237
  ],
4238
  "shape": [
4239
- 384
4240
  ],
4241
  "filename_index": 0
4242
  },
4243
  {
4244
  "offsets": [
4245
- 384
4246
  ],
4247
  "shape": [
4248
- 384
4249
  ],
4250
  "filename_index": 1
4251
  }
4252
  ]
4253
  },
4254
- "h.5.ln_1.weight": {
4255
  "type": "Distributed",
4256
  "shape": [
4257
- 768
4258
  ],
4259
  "dtype": "F32",
4260
  "chunks": [
@@ -4263,76 +4280,86 @@
4263
  0
4264
  ],
4265
  "shape": [
4266
- 384
4267
  ],
4268
  "filename_index": 0
4269
  },
4270
  {
4271
  "offsets": [
4272
- 384
4273
  ],
4274
  "shape": [
4275
- 384
4276
  ],
4277
  "filename_index": 1
4278
  }
4279
  ]
4280
  },
4281
- "h.0.ln_2.weight": {
4282
  "type": "Distributed",
4283
  "shape": [
 
4284
  768
4285
  ],
4286
  "dtype": "F32",
4287
  "chunks": [
4288
  {
4289
  "offsets": [
 
4290
  0
4291
  ],
4292
  "shape": [
4293
- 384
 
4294
  ],
4295
  "filename_index": 0
4296
  },
4297
  {
4298
  "offsets": [
4299
- 384
 
4300
  ],
4301
  "shape": [
4302
- 384
 
4303
  ],
4304
  "filename_index": 1
4305
  }
4306
  ]
4307
  },
4308
- "h.0.mlp.c_fc.bias": {
4309
  "type": "Distributed",
4310
  "shape": [
 
4311
  3072
4312
  ],
4313
  "dtype": "F32",
4314
  "chunks": [
4315
  {
4316
  "offsets": [
 
4317
  0
4318
  ],
4319
  "shape": [
 
4320
  1536
4321
  ],
4322
  "filename_index": 0
4323
  },
4324
  {
4325
  "offsets": [
 
4326
  1536
4327
  ],
4328
  "shape": [
 
4329
  1536
4330
  ],
4331
  "filename_index": 1
4332
  }
4333
  ]
4334
  },
4335
- "h.11.ln_2.bias": {
4336
  "type": "Distributed",
4337
  "shape": [
4338
  768
@@ -4359,71 +4386,34 @@
4359
  }
4360
  ]
4361
  },
4362
- "h.1.mlp.c_fc.weight": {
4363
- "type": "Distributed",
4364
- "shape": [
4365
- 768,
4366
- 3072
4367
- ],
4368
- "dtype": "F32",
4369
- "chunks": [
4370
- {
4371
- "offsets": [
4372
- 0,
4373
- 0
4374
- ],
4375
- "shape": [
4376
- 768,
4377
- 1536
4378
- ],
4379
- "filename_index": 0
4380
- },
4381
- {
4382
- "offsets": [
4383
- 0,
4384
- 1536
4385
- ],
4386
- "shape": [
4387
- 768,
4388
- 1536
4389
- ],
4390
- "filename_index": 1
4391
- }
4392
- ]
4393
- },
4394
- "h.7.mlp.c_fc.weight": {
4395
  "type": "Distributed",
4396
  "shape": [
4397
- 768,
4398
  3072
4399
  ],
4400
  "dtype": "F32",
4401
  "chunks": [
4402
  {
4403
  "offsets": [
4404
- 0,
4405
  0
4406
  ],
4407
  "shape": [
4408
- 768,
4409
  1536
4410
  ],
4411
  "filename_index": 0
4412
  },
4413
  {
4414
  "offsets": [
4415
- 0,
4416
  1536
4417
  ],
4418
  "shape": [
4419
- 768,
4420
  1536
4421
  ],
4422
  "filename_index": 1
4423
  }
4424
  ]
4425
  },
4426
- "h.8.ln_1.bias": {
4427
  "type": "Distributed",
4428
  "shape": [
4429
  768
@@ -4450,49 +4440,34 @@
4450
  }
4451
  ]
4452
  },
4453
- "h.5.attn.bias": {
4454
  "type": "Distributed",
4455
  "shape": [
4456
- 1,
4457
- 1,
4458
- 1024,
4459
- 1024
4460
  ],
4461
  "dtype": "F32",
4462
  "chunks": [
4463
  {
4464
  "offsets": [
4465
- 0,
4466
- 0,
4467
- 0,
4468
  0
4469
  ],
4470
  "shape": [
4471
- 1,
4472
- 1,
4473
- 1024,
4474
- 512
4475
  ],
4476
  "filename_index": 0
4477
  },
4478
  {
4479
  "offsets": [
4480
- 0,
4481
- 0,
4482
- 0,
4483
- 512
4484
  ],
4485
  "shape": [
4486
- 1,
4487
- 1,
4488
- 1024,
4489
- 512
4490
  ],
4491
  "filename_index": 1
4492
  }
4493
  ]
4494
  },
4495
- "h.7.ln_2.weight": {
4496
  "type": "Distributed",
4497
  "shape": [
4498
  768
@@ -4519,38 +4494,43 @@
4519
  }
4520
  ]
4521
  },
4522
- "h.4.attn.c_proj.bias": {
4523
  "type": "Distributed",
4524
  "shape": [
4525
- 768
 
4526
  ],
4527
  "dtype": "F32",
4528
  "chunks": [
4529
  {
4530
  "offsets": [
 
4531
  0
4532
  ],
4533
  "shape": [
4534
- 384
 
4535
  ],
4536
  "filename_index": 0
4537
  },
4538
  {
4539
  "offsets": [
4540
- 384
 
4541
  ],
4542
  "shape": [
4543
- 384
 
4544
  ],
4545
  "filename_index": 1
4546
  }
4547
  ]
4548
  },
4549
- "h.0.mlp.c_fc.weight": {
4550
  "type": "Distributed",
4551
  "shape": [
4552
  768,
4553
- 3072
4554
  ],
4555
  "dtype": "F32",
4556
  "chunks": [
@@ -4560,25 +4540,25 @@
4560
  0
4561
  ],
4562
  "shape": [
4563
- 768,
4564
- 1536
4565
  ],
4566
  "filename_index": 0
4567
  },
4568
  {
4569
  "offsets": [
4570
- 0,
4571
- 1536
4572
  ],
4573
  "shape": [
4574
- 768,
4575
- 1536
4576
  ],
4577
  "filename_index": 1
4578
  }
4579
  ]
4580
  },
4581
- "h.7.mlp.c_fc.bias": {
4582
  "type": "Distributed",
4583
  "shape": [
4584
  3072
@@ -4605,10 +4585,10 @@
4605
  }
4606
  ]
4607
  },
4608
- "h.11.attn.c_proj.weight": {
4609
  "type": "Distributed",
4610
  "shape": [
4611
- 768,
4612
  768
4613
  ],
4614
  "dtype": "F32",
@@ -4619,111 +4599,131 @@
4619
  0
4620
  ],
4621
  "shape": [
4622
- 384,
4623
  768
4624
  ],
4625
  "filename_index": 0
4626
  },
4627
  {
4628
  "offsets": [
4629
- 384,
4630
  0
4631
  ],
4632
  "shape": [
4633
- 384,
4634
  768
4635
  ],
4636
  "filename_index": 1
4637
  }
4638
  ]
4639
  },
4640
- "h.8.mlp.c_proj.bias": {
4641
  "type": "Distributed",
4642
  "shape": [
4643
- 768
 
4644
  ],
4645
  "dtype": "F32",
4646
  "chunks": [
4647
  {
4648
  "offsets": [
 
4649
  0
4650
  ],
4651
  "shape": [
4652
- 384
 
4653
  ],
4654
  "filename_index": 0
4655
  },
4656
  {
4657
  "offsets": [
4658
- 384
 
4659
  ],
4660
  "shape": [
4661
- 384
 
4662
  ],
4663
  "filename_index": 1
4664
  }
4665
  ]
4666
  },
4667
- "h.1.mlp.c_fc.bias": {
4668
  "type": "Distributed",
4669
  "shape": [
4670
- 3072
 
4671
  ],
4672
  "dtype": "F32",
4673
  "chunks": [
4674
  {
4675
  "offsets": [
 
4676
  0
4677
  ],
4678
  "shape": [
4679
- 1536
 
4680
  ],
4681
  "filename_index": 0
4682
  },
4683
  {
4684
  "offsets": [
4685
- 1536
 
4686
  ],
4687
  "shape": [
4688
- 1536
 
4689
  ],
4690
  "filename_index": 1
4691
  }
4692
  ]
4693
  },
4694
- "h.6.mlp.c_proj.weight": {
4695
  "type": "Distributed",
4696
  "shape": [
4697
- 3072,
4698
- 768
 
 
4699
  ],
4700
  "dtype": "F32",
4701
  "chunks": [
4702
  {
4703
  "offsets": [
 
 
4704
  0,
4705
  0
4706
  ],
4707
  "shape": [
4708
- 1536,
4709
- 768
 
 
4710
  ],
4711
  "filename_index": 0
4712
  },
4713
  {
4714
  "offsets": [
4715
- 1536,
4716
- 0
 
 
4717
  ],
4718
  "shape": [
4719
- 1536,
4720
- 768
 
 
4721
  ],
4722
  "filename_index": 1
4723
  }
4724
  ]
4725
  },
4726
- "h.2.attn.c_proj.bias": {
4727
  "type": "Distributed",
4728
  "shape": [
4729
  768
 
1
  {
2
  "tensors": {
3
+ "h.5.attn.c_proj.weight": {
4
  "type": "Distributed",
5
  "shape": [
6
+ 768,
7
  768
8
  ],
9
  "dtype": "F32",
10
  "chunks": [
11
  {
12
  "offsets": [
13
+ 0,
14
  0
15
  ],
16
  "shape": [
17
+ 384,
18
+ 768
19
  ],
20
  "filename_index": 0
21
  },
22
  {
23
  "offsets": [
24
+ 384,
25
+ 0
26
  ],
27
  "shape": [
28
+ 384,
29
+ 768
30
  ],
31
  "filename_index": 1
32
  }
33
  ]
34
  },
35
+ "h.7.ln_2.weight": {
36
  "type": "Distributed",
37
  "shape": [
38
  768
 
59
  }
60
  ]
61
  },
62
+ "h.8.attn.c_attn.weight": {
63
  "type": "Distributed",
64
  "shape": [
65
  768,
66
+ 2304
67
  ],
68
  "dtype": "F32",
69
  "chunks": [
 
73
  0
74
  ],
75
  "shape": [
76
+ 768,
77
+ 1152
78
  ],
79
  "filename_index": 0
80
  },
81
  {
82
  "offsets": [
83
+ 0,
84
+ 1152
85
  ],
86
  "shape": [
87
+ 768,
88
+ 1152
89
  ],
90
  "filename_index": 1
91
  }
92
  ]
93
  },
94
+ "h.11.mlp.c_fc.bias": {
95
  "type": "Distributed",
96
  "shape": [
97
+ 3072
98
  ],
99
  "dtype": "F32",
100
  "chunks": [
 
103
  0
104
  ],
105
  "shape": [
106
+ 1536
107
  ],
108
  "filename_index": 0
109
  },
110
  {
111
  "offsets": [
112
+ 1536
113
  ],
114
  "shape": [
115
+ 1536
116
  ],
117
  "filename_index": 1
118
  }
119
  ]
120
  },
121
+ "h.11.ln_1.weight": {
122
  "type": "Distributed",
123
  "shape": [
124
+ 768
 
 
 
125
  ],
126
  "dtype": "F32",
127
  "chunks": [
128
  {
129
  "offsets": [
 
 
 
130
  0
131
  ],
132
  "shape": [
133
+ 384
 
 
 
134
  ],
135
  "filename_index": 0
136
  },
137
  {
138
  "offsets": [
139
+ 384
 
 
 
140
  ],
141
  "shape": [
142
+ 384
 
 
 
143
  ],
144
  "filename_index": 1
145
  }
146
  ]
147
  },
148
+ "h.9.mlp.c_fc.bias": {
149
  "type": "Distributed",
150
  "shape": [
151
+ 3072
152
  ],
153
  "dtype": "F32",
154
  "chunks": [
 
157
  0
158
  ],
159
  "shape": [
160
+ 1536
161
  ],
162
  "filename_index": 0
163
  },
164
  {
165
  "offsets": [
166
+ 1536
167
  ],
168
  "shape": [
169
+ 1536
170
  ],
171
  "filename_index": 1
172
  }
173
  ]
174
  },
175
+ "h.3.ln_2.weight": {
176
  "type": "Distributed",
177
  "shape": [
178
  768
 
199
  }
200
  ]
201
  },
202
+ "h.6.mlp.c_proj.bias": {
203
  "type": "Distributed",
204
  "shape": [
205
+ 768
206
  ],
207
  "dtype": "F32",
208
  "chunks": [
 
211
  0
212
  ],
213
  "shape": [
214
+ 384
215
  ],
216
  "filename_index": 0
217
  },
218
  {
219
  "offsets": [
220
+ 384
221
  ],
222
  "shape": [
223
+ 384
224
  ],
225
  "filename_index": 1
226
  }
227
  ]
228
  },
229
+ "h.1.ln_1.weight": {
230
  "type": "Distributed",
231
  "shape": [
232
  768
 
253
  }
254
  ]
255
  },
256
+ "h.1.attn.bias": {
257
  "type": "Distributed",
258
  "shape": [
259
+ 1,
260
+ 1,
261
+ 1024,
262
+ 1024
263
  ],
264
  "dtype": "F32",
265
  "chunks": [
266
  {
267
  "offsets": [
268
+ 0,
269
+ 0,
270
+ 0,
271
  0
272
  ],
273
  "shape": [
274
+ 1,
275
+ 1,
276
+ 1024,
277
+ 512
278
  ],
279
  "filename_index": 0
280
  },
281
  {
282
  "offsets": [
283
+ 0,
284
+ 0,
285
+ 0,
286
+ 512
287
  ],
288
  "shape": [
289
+ 1,
290
+ 1,
291
+ 1024,
292
+ 512
293
  ],
294
  "filename_index": 1
295
  }
296
  ]
297
  },
298
+ "h.1.mlp.c_fc.weight": {
299
  "type": "Distributed",
300
  "shape": [
301
+ 768,
302
+ 3072
303
  ],
304
  "dtype": "F32",
305
  "chunks": [
306
  {
307
  "offsets": [
308
+ 0,
309
  0
310
  ],
311
  "shape": [
312
+ 768,
313
+ 1536
314
  ],
315
  "filename_index": 0
316
  },
317
  {
318
  "offsets": [
319
+ 0,
320
+ 1536
321
  ],
322
  "shape": [
323
+ 768,
324
+ 1536
325
  ],
326
  "filename_index": 1
327
  }
328
  ]
329
  },
330
+ "h.6.ln_1.weight": {
331
  "type": "Distributed",
332
  "shape": [
333
+ 768
334
  ],
335
  "dtype": "F32",
336
  "chunks": [
 
339
  0
340
  ],
341
  "shape": [
342
+ 384
343
  ],
344
  "filename_index": 0
345
  },
346
  {
347
  "offsets": [
348
+ 384
349
  ],
350
  "shape": [
351
+ 384
352
  ],
353
  "filename_index": 1
354
  }
355
  ]
356
  },
357
+ "ln_f.weight": {
358
  "type": "Distributed",
359
  "shape": [
 
360
  768
361
  ],
362
  "dtype": "F32",
363
  "chunks": [
364
  {
365
  "offsets": [
 
366
  0
367
  ],
368
  "shape": [
369
+ 384
 
370
  ],
371
  "filename_index": 0
372
  },
373
  {
374
  "offsets": [
375
+ 384
 
376
  ],
377
  "shape": [
378
+ 384
 
379
  ],
380
  "filename_index": 1
381
  }
382
  ]
383
  },
384
+ "h.10.ln_2.bias": {
385
  "type": "Distributed",
386
  "shape": [
387
+ 768
 
388
  ],
389
  "dtype": "F32",
390
  "chunks": [
391
  {
392
  "offsets": [
 
393
  0
394
  ],
395
  "shape": [
396
+ 384
 
397
  ],
398
  "filename_index": 0
399
  },
400
  {
401
  "offsets": [
402
+ 384
 
403
  ],
404
  "shape": [
405
+ 384
 
406
  ],
407
  "filename_index": 1
408
  }
409
  ]
410
  },
411
+ "h.5.attn.c_proj.bias": {
412
  "type": "Distributed",
413
  "shape": [
414
  768
 
435
  }
436
  ]
437
  },
438
+ "h.10.mlp.c_fc.weight": {
439
  "type": "Distributed",
440
  "shape": [
441
+ 768,
442
+ 3072
443
  ],
444
  "dtype": "F32",
445
  "chunks": [
446
  {
447
  "offsets": [
448
+ 0,
449
  0
450
  ],
451
  "shape": [
452
+ 768,
453
+ 1536
454
  ],
455
  "filename_index": 0
456
  },
457
  {
458
  "offsets": [
459
+ 0,
460
+ 1536
461
  ],
462
  "shape": [
463
+ 768,
464
+ 1536
465
  ],
466
  "filename_index": 1
467
  }
468
  ]
469
  },
470
+ "h.2.mlp.c_fc.weight": {
471
  "type": "Distributed",
472
  "shape": [
473
+ 768,
474
  3072
475
  ],
476
  "dtype": "F32",
477
  "chunks": [
478
  {
479
  "offsets": [
480
+ 0,
481
  0
482
  ],
483
  "shape": [
484
+ 768,
485
  1536
486
  ],
487
  "filename_index": 0
488
  },
489
  {
490
  "offsets": [
491
+ 0,
492
  1536
493
  ],
494
  "shape": [
495
+ 768,
496
  1536
497
  ],
498
  "filename_index": 1
499
  }
500
  ]
501
  },
502
+ "h.4.attn.c_attn.bias": {
503
  "type": "Distributed",
504
  "shape": [
505
+ 2304
506
  ],
507
  "dtype": "F32",
508
  "chunks": [
 
511
  0
512
  ],
513
  "shape": [
514
+ 1152
515
  ],
516
  "filename_index": 0
517
  },
518
  {
519
  "offsets": [
520
+ 1152
521
  ],
522
  "shape": [
523
+ 1152
524
  ],
525
  "filename_index": 1
526
  }
527
  ]
528
  },
529
+ "h.11.attn.c_attn.bias": {
530
  "type": "Distributed",
531
  "shape": [
532
+ 2304
533
  ],
534
  "dtype": "F32",
535
  "chunks": [
 
538
  0
539
  ],
540
  "shape": [
541
+ 1152
542
  ],
543
  "filename_index": 0
544
  },
545
  {
546
  "offsets": [
547
+ 1152
548
  ],
549
  "shape": [
550
+ 1152
551
  ],
552
  "filename_index": 1
553
  }
554
  ]
555
  },
556
+ "h.11.ln_1.bias": {
557
  "type": "Distributed",
558
  "shape": [
 
559
  768
560
  ],
561
  "dtype": "F32",
562
  "chunks": [
563
  {
564
  "offsets": [
 
565
  0
566
  ],
567
  "shape": [
568
+ 384
 
569
  ],
570
  "filename_index": 0
571
  },
572
  {
573
  "offsets": [
574
+ 384
 
575
  ],
576
  "shape": [
577
+ 384
 
578
  ],
579
  "filename_index": 1
580
  }
581
  ]
582
  },
583
+ "h.0.ln_1.weight": {
584
  "type": "Distributed",
585
  "shape": [
586
  768
 
607
  }
608
  ]
609
  },
610
+ "h.9.ln_2.weight": {
611
  "type": "Distributed",
612
  "shape": [
613
  768
 
634
  }
635
  ]
636
  },
637
+ "h.9.attn.c_attn.bias": {
638
  "type": "Distributed",
639
  "shape": [
640
  2304
 
661
  }
662
  ]
663
  },
664
+ "h.2.attn.bias": {
665
  "type": "Distributed",
666
  "shape": [
667
+ 1,
668
+ 1,
669
+ 1024,
670
+ 1024
671
  ],
672
  "dtype": "F32",
673
  "chunks": [
674
  {
675
  "offsets": [
676
+ 0,
677
+ 0,
678
+ 0,
679
  0
680
  ],
681
  "shape": [
682
+ 1,
683
+ 1,
684
+ 1024,
685
+ 512
686
  ],
687
  "filename_index": 0
688
  },
689
  {
690
  "offsets": [
691
+ 0,
692
+ 0,
693
+ 0,
694
+ 512
695
  ],
696
  "shape": [
697
+ 1,
698
+ 1,
699
+ 1024,
700
+ 512
701
  ],
702
  "filename_index": 1
703
  }
704
  ]
705
  },
706
+ "h.0.mlp.c_fc.bias": {
707
  "type": "Distributed",
708
  "shape": [
709
  3072
 
730
  }
731
  ]
732
  },
733
+ "h.0.attn.c_attn.bias": {
734
  "type": "Distributed",
735
  "shape": [
736
+ 2304
737
  ],
738
  "dtype": "F32",
739
  "chunks": [
 
742
  0
743
  ],
744
  "shape": [
745
+ 1152
746
  ],
747
  "filename_index": 0
748
  },
749
  {
750
  "offsets": [
751
+ 1152
752
  ],
753
  "shape": [
754
+ 1152
755
  ],
756
  "filename_index": 1
757
  }
758
  ]
759
  },
760
+ "h.2.mlp.c_proj.weight": {
761
  "type": "Distributed",
762
  "shape": [
763
+ 3072,
764
+ 768
765
  ],
766
  "dtype": "F32",
767
  "chunks": [
768
  {
769
  "offsets": [
770
+ 0,
771
  0
772
  ],
773
  "shape": [
774
+ 1536,
775
+ 768
776
  ],
777
  "filename_index": 0
778
  },
779
  {
780
  "offsets": [
781
+ 1536,
782
+ 0
783
  ],
784
  "shape": [
785
+ 1536,
786
+ 768
787
  ],
788
  "filename_index": 1
789
  }
790
  ]
791
  },
792
+ "h.1.attn.c_attn.bias": {
793
  "type": "Distributed",
794
  "shape": [
 
795
  2304
796
  ],
797
  "dtype": "F32",
798
  "chunks": [
799
  {
800
  "offsets": [
 
801
  0
802
  ],
803
  "shape": [
 
804
  1152
805
  ],
806
  "filename_index": 0
807
  },
808
  {
809
  "offsets": [
 
810
  1152
811
  ],
812
  "shape": [
 
813
  1152
814
  ],
815
  "filename_index": 1
816
  }
817
  ]
818
  },
819
+ "h.3.mlp.c_proj.weight": {
820
  "type": "Distributed",
821
  "shape": [
822
+ 3072,
823
+ 768
 
 
824
  ],
825
  "dtype": "F32",
826
  "chunks": [
827
  {
828
  "offsets": [
 
 
829
  0,
830
  0
831
  ],
832
  "shape": [
833
+ 1536,
834
+ 768
 
 
835
  ],
836
  "filename_index": 0
837
  },
838
  {
839
  "offsets": [
840
+ 1536,
841
+ 0
 
 
842
  ],
843
  "shape": [
844
+ 1536,
845
+ 768
 
 
846
  ],
847
  "filename_index": 1
848
  }
849
  ]
850
  },
851
+ "h.5.attn.c_attn.bias": {
852
  "type": "Distributed",
853
  "shape": [
854
+ 2304
855
  ],
856
  "dtype": "F32",
857
  "chunks": [
 
860
  0
861
  ],
862
  "shape": [
863
+ 1152
864
  ],
865
  "filename_index": 0
866
  },
867
  {
868
  "offsets": [
869
+ 1152
870
  ],
871
  "shape": [
872
+ 1152
873
  ],
874
  "filename_index": 1
875
  }
876
  ]
877
  },
878
+ "h.0.attn.c_attn.weight": {
879
  "type": "Distributed",
880
  "shape": [
881
+ 768,
882
+ 2304
 
 
883
  ],
884
  "dtype": "F32",
885
  "chunks": [
886
  {
887
  "offsets": [
 
 
888
  0,
889
  0
890
  ],
891
  "shape": [
892
+ 768,
893
+ 1152
 
 
894
  ],
895
  "filename_index": 0
896
  },
897
  {
898
  "offsets": [
899
  0,
900
+ 1152
 
 
901
  ],
902
  "shape": [
903
+ 768,
904
+ 1152
 
 
905
  ],
906
  "filename_index": 1
907
  }
908
  ]
909
  },
910
+ "h.2.attn.c_proj.bias": {
911
  "type": "Distributed",
912
  "shape": [
913
  768
 
934
  }
935
  ]
936
  },
937
+ "h.7.attn.c_proj.bias": {
938
  "type": "Distributed",
939
  "shape": [
940
  768
 
961
  }
962
  ]
963
  },
964
+ "h.9.ln_1.bias": {
965
  "type": "Distributed",
966
  "shape": [
 
967
  768
968
  ],
969
  "dtype": "F32",
970
  "chunks": [
971
  {
972
  "offsets": [
 
973
  0
974
  ],
975
  "shape": [
976
+ 384
 
977
  ],
978
  "filename_index": 0
979
  },
980
  {
981
  "offsets": [
982
+ 384
 
983
  ],
984
  "shape": [
985
+ 384
 
986
  ],
987
  "filename_index": 1
988
  }
989
  ]
990
  },
991
+ "h.2.mlp.c_proj.bias": {
992
  "type": "Distributed",
993
  "shape": [
994
  768
 
1015
  }
1016
  ]
1017
  },
1018
+ "h.10.ln_1.bias": {
1019
  "type": "Distributed",
1020
  "shape": [
 
1021
  768
1022
  ],
1023
  "dtype": "F32",
1024
  "chunks": [
1025
  {
1026
  "offsets": [
 
1027
  0
1028
  ],
1029
  "shape": [
1030
+ 384
 
1031
  ],
1032
  "filename_index": 0
1033
  },
1034
  {
1035
  "offsets": [
1036
+ 384
 
1037
  ],
1038
  "shape": [
1039
+ 384
 
1040
  ],
1041
  "filename_index": 1
1042
  }
1043
  ]
1044
  },
1045
+ "h.10.mlp.c_proj.weight": {
1046
  "type": "Distributed",
1047
  "shape": [
1048
+ 3072,
1049
+ 768
1050
  ],
1051
  "dtype": "F32",
1052
  "chunks": [
 
1056
  0
1057
  ],
1058
  "shape": [
1059
+ 1536,
1060
+ 768
1061
  ],
1062
  "filename_index": 0
1063
  },
1064
  {
1065
  "offsets": [
1066
+ 1536,
1067
+ 0
1068
  ],
1069
  "shape": [
1070
+ 1536,
1071
+ 768
1072
  ],
1073
  "filename_index": 1
1074
  }
1075
  ]
1076
  },
1077
+ "h.6.attn.c_proj.bias": {
1078
  "type": "Distributed",
1079
  "shape": [
1080
  768
 
1101
  }
1102
  ]
1103
  },
1104
+ "h.8.ln_1.weight": {
1105
  "type": "Distributed",
1106
  "shape": [
1107
+ 768
 
1108
  ],
1109
  "dtype": "F32",
1110
  "chunks": [
1111
  {
1112
  "offsets": [
 
1113
  0
1114
  ],
1115
  "shape": [
1116
+ 384
 
1117
  ],
1118
  "filename_index": 0
1119
  },
1120
  {
1121
  "offsets": [
1122
+ 384
 
1123
  ],
1124
  "shape": [
1125
+ 384
 
1126
  ],
1127
  "filename_index": 1
1128
  }
1129
  ]
1130
  },
1131
+ "h.10.attn.c_attn.bias": {
1132
  "type": "Distributed",
1133
  "shape": [
1134
+ 2304
1135
  ],
1136
  "dtype": "F32",
1137
  "chunks": [
 
1140
  0
1141
  ],
1142
  "shape": [
1143
+ 1152
1144
  ],
1145
  "filename_index": 0
1146
  },
1147
  {
1148
  "offsets": [
1149
+ 1152
1150
  ],
1151
  "shape": [
1152
+ 1152
1153
  ],
1154
  "filename_index": 1
1155
  }
1156
  ]
1157
  },
1158
+ "h.6.mlp.c_fc.weight": {
1159
  "type": "Distributed",
1160
  "shape": [
1161
+ 768,
1162
+ 3072
1163
  ],
1164
  "dtype": "F32",
1165
  "chunks": [
1166
  {
1167
  "offsets": [
1168
+ 0,
1169
  0
1170
  ],
1171
  "shape": [
1172
+ 768,
1173
+ 1536
1174
  ],
1175
  "filename_index": 0
1176
  },
1177
  {
1178
  "offsets": [
1179
+ 0,
1180
+ 1536
1181
  ],
1182
  "shape": [
1183
+ 768,
1184
+ 1536
1185
  ],
1186
  "filename_index": 1
1187
  }
1188
  ]
1189
  },
1190
+ "h.6.ln_2.bias": {
1191
  "type": "Distributed",
1192
  "shape": [
1193
+ 768
1194
  ],
1195
  "dtype": "F32",
1196
  "chunks": [
 
1199
  0
1200
  ],
1201
  "shape": [
1202
+ 384
1203
  ],
1204
  "filename_index": 0
1205
  },
1206
  {
1207
  "offsets": [
1208
+ 384
1209
  ],
1210
  "shape": [
1211
+ 384
1212
  ],
1213
  "filename_index": 1
1214
  }
1215
  ]
1216
  },
1217
+ "h.1.ln_2.weight": {
1218
  "type": "Distributed",
1219
  "shape": [
1220
  768
 
1241
  }
1242
  ]
1243
  },
1244
+ "h.0.ln_1.bias": {
1245
  "type": "Distributed",
1246
  "shape": [
1247
  768
 
1268
  }
1269
  ]
1270
  },
1271
+ "h.4.ln_2.bias": {
1272
  "type": "Distributed",
1273
  "shape": [
 
1274
  768
1275
  ],
1276
  "dtype": "F32",
1277
  "chunks": [
1278
  {
1279
  "offsets": [
 
1280
  0
1281
  ],
1282
  "shape": [
1283
+ 384
 
1284
  ],
1285
  "filename_index": 0
1286
  },
1287
  {
1288
  "offsets": [
1289
+ 384
 
1290
  ],
1291
  "shape": [
1292
+ 384
 
1293
  ],
1294
  "filename_index": 1
1295
  }
1296
  ]
1297
  },
1298
+ "wte.weight": {
1299
  "type": "Distributed",
1300
  "shape": [
1301
+ 50257,
1302
+ 768
1303
  ],
1304
  "dtype": "F32",
1305
  "chunks": [
 
1309
  0
1310
  ],
1311
  "shape": [
1312
+ 50257,
1313
+ 384
1314
  ],
1315
  "filename_index": 0
1316
  },
1317
  {
1318
  "offsets": [
1319
  0,
1320
+ 384
1321
  ],
1322
  "shape": [
1323
+ 50257,
1324
+ 384
1325
  ],
1326
  "filename_index": 1
1327
  }
1328
  ]
1329
  },
1330
+ "h.3.ln_1.bias": {
1331
  "type": "Distributed",
1332
  "shape": [
1333
  768
 
1386
  }
1387
  ]
1388
  },
1389
+ "h.2.ln_2.bias": {
1390
  "type": "Distributed",
1391
  "shape": [
1392
+ 768
1393
  ],
1394
  "dtype": "F32",
1395
  "chunks": [
 
1398
  0
1399
  ],
1400
  "shape": [
1401
+ 384
1402
  ],
1403
  "filename_index": 0
1404
  },
1405
  {
1406
  "offsets": [
1407
+ 384
1408
  ],
1409
  "shape": [
1410
+ 384
1411
  ],
1412
  "filename_index": 1
1413
  }
1414
  ]
1415
  },
1416
+ "h.7.ln_2.bias": {
1417
  "type": "Distributed",
1418
  "shape": [
1419
+ 768
1420
  ],
1421
  "dtype": "F32",
1422
  "chunks": [
 
1425
  0
1426
  ],
1427
  "shape": [
1428
+ 384
1429
  ],
1430
  "filename_index": 0
1431
  },
1432
  {
1433
  "offsets": [
1434
+ 384
1435
  ],
1436
  "shape": [
1437
+ 384
1438
  ],
1439
  "filename_index": 1
1440
  }
1441
  ]
1442
  },
1443
+ "h.4.mlp.c_proj.bias": {
1444
  "type": "Distributed",
1445
  "shape": [
1446
  768
 
1467
  }
1468
  ]
1469
  },
1470
+ "h.7.mlp.c_proj.weight": {
1471
  "type": "Distributed",
1472
  "shape": [
1473
+ 3072,
1474
+ 768
1475
  ],
1476
  "dtype": "F32",
1477
  "chunks": [
 
1481
  0
1482
  ],
1483
  "shape": [
1484
+ 1536,
1485
+ 768
1486
  ],
1487
  "filename_index": 0
1488
  },
1489
  {
1490
  "offsets": [
1491
+ 1536,
1492
+ 0
1493
  ],
1494
  "shape": [
1495
+ 1536,
1496
+ 768
1497
  ],
1498
  "filename_index": 1
1499
  }
1500
  ]
1501
  },
1502
+ "h.3.attn.c_proj.weight": {
1503
  "type": "Distributed",
1504
  "shape": [
1505
  768,
1506
+ 768
1507
  ],
1508
  "dtype": "F32",
1509
  "chunks": [
 
1513
  0
1514
  ],
1515
  "shape": [
1516
+ 384,
1517
+ 768
1518
  ],
1519
  "filename_index": 0
1520
  },
1521
  {
1522
  "offsets": [
1523
+ 384,
1524
+ 0
1525
  ],
1526
  "shape": [
1527
+ 384,
1528
+ 768
1529
  ],
1530
  "filename_index": 1
1531
  }
1532
  ]
1533
  },
1534
+ "h.5.attn.bias": {
1535
  "type": "Distributed",
1536
  "shape": [
1537
  1,
 
1573
  }
1574
  ]
1575
  },
1576
+ "h.8.mlp.c_proj.weight": {
1577
  "type": "Distributed",
1578
  "shape": [
1579
+ 3072,
1580
  768
1581
  ],
1582
  "dtype": "F32",
 
1587
  0
1588
  ],
1589
  "shape": [
1590
+ 1536,
1591
+ 768
1592
  ],
1593
  "filename_index": 0
1594
  },
1595
  {
1596
  "offsets": [
1597
+ 1536,
1598
+ 0
1599
  ],
1600
  "shape": [
1601
+ 1536,
1602
+ 768
1603
  ],
1604
  "filename_index": 1
1605
  }
1606
  ]
1607
  },
1608
+ "h.4.ln_2.weight": {
1609
  "type": "Distributed",
1610
  "shape": [
1611
  768
 
1632
  }
1633
  ]
1634
  },
1635
+ "h.10.attn.c_proj.weight": {
1636
  "type": "Distributed",
1637
  "shape": [
1638
+ 768,
1639
+ 768
1640
  ],
1641
  "dtype": "F32",
1642
  "chunks": [
1643
  {
1644
  "offsets": [
1645
+ 0,
1646
  0
1647
  ],
1648
  "shape": [
1649
+ 384,
1650
+ 768
1651
  ],
1652
  "filename_index": 0
1653
  },
1654
  {
1655
  "offsets": [
1656
+ 384,
1657
+ 0
1658
  ],
1659
  "shape": [
1660
+ 384,
1661
+ 768
1662
  ],
1663
  "filename_index": 1
1664
  }
1665
  ]
1666
  },
1667
+ "h.0.ln_2.weight": {
1668
  "type": "Distributed",
1669
  "shape": [
1670
  768
 
1691
  }
1692
  ]
1693
  },
1694
+ "h.4.ln_1.bias": {
1695
  "type": "Distributed",
1696
  "shape": [
1697
  768
 
1718
  }
1719
  ]
1720
  },
1721
+ "h.7.ln_1.weight": {
1722
  "type": "Distributed",
1723
  "shape": [
1724
+ 768
1725
  ],
1726
  "dtype": "F32",
1727
  "chunks": [
 
1730
  0
1731
  ],
1732
  "shape": [
1733
+ 384
1734
  ],
1735
  "filename_index": 0
1736
  },
1737
  {
1738
  "offsets": [
1739
+ 384
1740
  ],
1741
  "shape": [
1742
+ 384
1743
  ],
1744
  "filename_index": 1
1745
  }
1746
  ]
1747
  },
1748
+ "h.5.mlp.c_proj.bias": {
1749
  "type": "Distributed",
1750
  "shape": [
1751
  768
 
1772
  }
1773
  ]
1774
  },
1775
+ "h.1.mlp.c_fc.bias": {
1776
  "type": "Distributed",
1777
  "shape": [
1778
+ 3072
1779
  ],
1780
  "dtype": "F32",
1781
  "chunks": [
 
1784
  0
1785
  ],
1786
  "shape": [
1787
+ 1536
1788
  ],
1789
  "filename_index": 0
1790
  },
1791
  {
1792
  "offsets": [
1793
+ 1536
1794
  ],
1795
  "shape": [
1796
+ 1536
1797
  ],
1798
  "filename_index": 1
1799
  }
1800
  ]
1801
  },
1802
+ "h.1.mlp.c_proj.bias": {
1803
  "type": "Distributed",
1804
  "shape": [
 
1805
  768
1806
  ],
1807
  "dtype": "F32",
1808
  "chunks": [
1809
  {
1810
  "offsets": [
 
1811
  0
1812
  ],
1813
  "shape": [
1814
+ 384
 
1815
  ],
1816
  "filename_index": 0
1817
  },
1818
  {
1819
  "offsets": [
1820
+ 384
 
1821
  ],
1822
  "shape": [
1823
+ 384
 
1824
  ],
1825
  "filename_index": 1
1826
  }
1827
  ]
1828
  },
1829
+ "h.10.ln_1.weight": {
1830
  "type": "Distributed",
1831
  "shape": [
1832
  768
 
1853
  }
1854
  ]
1855
  },
1856
+ "h.3.attn.c_attn.weight": {
1857
  "type": "Distributed",
1858
  "shape": [
1859
  768,
1860
+ 2304
1861
  ],
1862
  "dtype": "F32",
1863
  "chunks": [
 
1867
  0
1868
  ],
1869
  "shape": [
1870
+ 768,
1871
+ 1152
1872
  ],
1873
  "filename_index": 0
1874
  },
1875
  {
1876
  "offsets": [
1877
+ 0,
1878
+ 1152
1879
  ],
1880
  "shape": [
1881
+ 768,
1882
+ 1152
1883
  ],
1884
  "filename_index": 1
1885
  }
1886
  ]
1887
  },
1888
+ "h.1.attn.c_proj.bias": {
1889
  "type": "Distributed",
1890
  "shape": [
1891
  768
 
1912
  }
1913
  ]
1914
  },
1915
+ "h.8.attn.c_proj.weight": {
1916
  "type": "Distributed",
1917
  "shape": [
1918
+ 768,
1919
  768
1920
  ],
1921
  "dtype": "F32",
1922
  "chunks": [
1923
  {
1924
  "offsets": [
1925
+ 0,
1926
  0
1927
  ],
1928
  "shape": [
1929
+ 384,
1930
+ 768
1931
  ],
1932
  "filename_index": 0
1933
  },
1934
  {
1935
  "offsets": [
1936
+ 384,
1937
+ 0
1938
  ],
1939
  "shape": [
1940
+ 384,
1941
+ 768
1942
  ],
1943
  "filename_index": 1
1944
  }
1945
  ]
1946
  },
1947
+ "h.8.mlp.c_fc.weight": {
1948
  "type": "Distributed",
1949
  "shape": [
1950
+ 768,
1951
+ 3072
1952
  ],
1953
  "dtype": "F32",
1954
  "chunks": [
1955
  {
1956
  "offsets": [
1957
+ 0,
1958
  0
1959
  ],
1960
  "shape": [
1961
+ 768,
1962
+ 1536
1963
  ],
1964
  "filename_index": 0
1965
  },
1966
  {
1967
  "offsets": [
1968
+ 0,
1969
+ 1536
1970
  ],
1971
  "shape": [
1972
+ 768,
1973
+ 1536
1974
  ],
1975
  "filename_index": 1
1976
  }
1977
  ]
1978
  },
1979
+ "h.6.mlp.c_fc.bias": {
1980
  "type": "Distributed",
1981
  "shape": [
1982
+ 3072
1983
  ],
1984
  "dtype": "F32",
1985
  "chunks": [
 
1988
  0
1989
  ],
1990
  "shape": [
1991
+ 1536
1992
  ],
1993
  "filename_index": 0
1994
  },
1995
  {
1996
  "offsets": [
1997
+ 1536
1998
  ],
1999
  "shape": [
2000
+ 1536
2001
  ],
2002
  "filename_index": 1
2003
  }
2004
  ]
2005
  },
2006
+ "h.7.mlp.c_proj.bias": {
2007
  "type": "Distributed",
2008
  "shape": [
2009
  768
 
2030
  }
2031
  ]
2032
  },
2033
+ "h.0.mlp.c_fc.weight": {
2034
  "type": "Distributed",
2035
  "shape": [
2036
+ 768,
2037
+ 3072
2038
  ],
2039
  "dtype": "F32",
2040
  "chunks": [
 
2044
  0
2045
  ],
2046
  "shape": [
2047
+ 768,
2048
+ 1536
2049
  ],
2050
  "filename_index": 0
2051
  },
2052
  {
2053
  "offsets": [
2054
+ 0,
2055
+ 1536
2056
  ],
2057
  "shape": [
2058
+ 768,
2059
+ 1536
2060
  ],
2061
  "filename_index": 1
2062
  }
2063
  ]
2064
  },
2065
+ "h.11.mlp.c_proj.weight": {
2066
  "type": "Distributed",
2067
  "shape": [
2068
+ 3072,
2069
  768
2070
  ],
2071
  "dtype": "F32",
2072
  "chunks": [
2073
  {
2074
  "offsets": [
2075
+ 0,
2076
  0
2077
  ],
2078
  "shape": [
2079
+ 1536,
2080
+ 768
2081
  ],
2082
  "filename_index": 0
2083
  },
2084
  {
2085
  "offsets": [
2086
+ 1536,
2087
+ 0
2088
  ],
2089
  "shape": [
2090
+ 1536,
2091
+ 768
2092
  ],
2093
  "filename_index": 1
2094
  }
2095
  ]
2096
  },
2097
+ "h.9.attn.c_proj.weight": {
2098
  "type": "Distributed",
2099
  "shape": [
2100
+ 768,
2101
+ 768
2102
  ],
2103
  "dtype": "F32",
2104
  "chunks": [
2105
  {
2106
  "offsets": [
2107
+ 0,
2108
  0
2109
  ],
2110
  "shape": [
2111
+ 384,
2112
+ 768
2113
  ],
2114
  "filename_index": 0
2115
  },
2116
  {
2117
  "offsets": [
2118
+ 384,
2119
+ 0
2120
  ],
2121
  "shape": [
2122
+ 384,
2123
+ 768
2124
  ],
2125
  "filename_index": 1
2126
  }
2127
  ]
2128
  },
2129
+ "h.3.ln_2.bias": {
2130
  "type": "Distributed",
2131
  "shape": [
2132
  768
 
2153
  }
2154
  ]
2155
  },
2156
+ "h.7.mlp.c_fc.weight": {
2157
  "type": "Distributed",
2158
  "shape": [
2159
  768,
2160
+ 3072
2161
  ],
2162
  "dtype": "F32",
2163
  "chunks": [
 
2167
  0
2168
  ],
2169
  "shape": [
2170
+ 768,
2171
+ 1536
2172
  ],
2173
  "filename_index": 0
2174
  },
2175
  {
2176
  "offsets": [
2177
+ 0,
2178
+ 1536
2179
  ],
2180
  "shape": [
2181
+ 768,
2182
+ 1536
2183
  ],
2184
  "filename_index": 1
2185
  }
2186
  ]
2187
  },
2188
+ "h.0.attn.c_proj.bias": {
2189
  "type": "Distributed",
2190
  "shape": [
2191
  768
 
2212
  }
2213
  ]
2214
  },
2215
+ "h.1.ln_2.bias": {
2216
  "type": "Distributed",
2217
  "shape": [
2218
+ 768
2219
  ],
2220
  "dtype": "F32",
2221
  "chunks": [
 
2224
  0
2225
  ],
2226
  "shape": [
2227
+ 384
2228
  ],
2229
  "filename_index": 0
2230
  },
2231
  {
2232
  "offsets": [
2233
+ 384
2234
  ],
2235
  "shape": [
2236
+ 384
2237
  ],
2238
  "filename_index": 1
2239
  }
2240
  ]
2241
  },
2242
+ "wpe.weight": {
2243
  "type": "Distributed",
2244
  "shape": [
2245
+ 1024,
2246
+ 768
2247
  ],
2248
  "dtype": "F32",
2249
  "chunks": [
2250
  {
2251
  "offsets": [
2252
+ 0,
2253
  0
2254
  ],
2255
  "shape": [
2256
+ 1024,
2257
+ 384
2258
  ],
2259
  "filename_index": 0
2260
  },
2261
  {
2262
  "offsets": [
2263
+ 0,
2264
+ 384
2265
+ ],
2266
  "shape": [
2267
+ 1024,
2268
+ 384
2269
  ],
2270
  "filename_index": 1
2271
  }
2272
  ]
2273
  },
2274
+ "h.9.mlp.c_fc.weight": {
2275
  "type": "Distributed",
2276
  "shape": [
2277
+ 768,
2278
+ 3072
2279
  ],
2280
  "dtype": "F32",
2281
  "chunks": [
2282
  {
2283
  "offsets": [
2284
+ 0,
2285
  0
2286
  ],
2287
  "shape": [
2288
+ 768,
2289
+ 1536
2290
  ],
2291
  "filename_index": 0
2292
  },
2293
  {
2294
  "offsets": [
2295
+ 0,
2296
+ 1536
2297
  ],
2298
  "shape": [
2299
+ 768,
2300
+ 1536
2301
  ],
2302
  "filename_index": 1
2303
  }
2304
  ]
2305
  },
2306
+ "h.5.ln_2.weight": {
2307
  "type": "Distributed",
2308
  "shape": [
2309
  768
 
2330
  }
2331
  ]
2332
  },
2333
+ "h.6.attn.c_attn.bias": {
2334
  "type": "Distributed",
2335
  "shape": [
2336
+ 2304
2337
  ],
2338
  "dtype": "F32",
2339
  "chunks": [
 
2342
  0
2343
  ],
2344
  "shape": [
2345
+ 1152
2346
  ],
2347
  "filename_index": 0
2348
  },
2349
  {
2350
  "offsets": [
2351
+ 1152
2352
  ],
2353
  "shape": [
2354
+ 1152
2355
  ],
2356
  "filename_index": 1
2357
  }
2358
  ]
2359
  },
2360
+ "h.3.mlp.c_fc.bias": {
2361
  "type": "Distributed",
2362
  "shape": [
2363
+ 3072
 
2364
  ],
2365
  "dtype": "F32",
2366
  "chunks": [
2367
  {
2368
  "offsets": [
 
2369
  0
2370
  ],
2371
  "shape": [
2372
+ 1536
 
2373
  ],
2374
  "filename_index": 0
2375
  },
2376
  {
2377
  "offsets": [
2378
+ 1536
 
2379
  ],
2380
  "shape": [
2381
+ 1536
 
2382
  ],
2383
  "filename_index": 1
2384
  }
2385
  ]
2386
  },
2387
+ "h.11.attn.bias": {
2388
  "type": "Distributed",
2389
  "shape": [
2390
  1,
 
2426
  }
2427
  ]
2428
  },
2429
+ "h.0.mlp.c_proj.weight": {
2430
  "type": "Distributed",
2431
  "shape": [
2432
+ 3072,
2433
+ 768
2434
  ],
2435
  "dtype": "F32",
2436
  "chunks": [
 
2440
  0
2441
  ],
2442
  "shape": [
2443
+ 1536,
2444
+ 768
2445
  ],
2446
  "filename_index": 0
2447
  },
2448
  {
2449
  "offsets": [
2450
+ 1536,
2451
+ 0
2452
  ],
2453
  "shape": [
2454
+ 1536,
2455
+ 768
2456
  ],
2457
  "filename_index": 1
2458
  }
2459
  ]
2460
  },
2461
+ "h.4.attn.bias": {
2462
  "type": "Distributed",
2463
  "shape": [
2464
+ 1,
2465
+ 1,
2466
+ 1024,
2467
+ 1024
2468
  ],
2469
  "dtype": "F32",
2470
  "chunks": [
2471
  {
2472
  "offsets": [
2473
+ 0,
2474
+ 0,
2475
+ 0,
2476
  0
2477
  ],
2478
  "shape": [
2479
+ 1,
2480
+ 1,
2481
+ 1024,
2482
+ 512
2483
  ],
2484
  "filename_index": 0
2485
  },
2486
  {
2487
  "offsets": [
2488
+ 0,
2489
+ 0,
2490
+ 0,
2491
+ 512
2492
  ],
2493
  "shape": [
2494
+ 1,
2495
+ 1,
2496
+ 1024,
2497
+ 512
2498
  ],
2499
  "filename_index": 1
2500
  }
2501
  ]
2502
  },
2503
+ "h.10.mlp.c_proj.bias": {
2504
  "type": "Distributed",
2505
  "shape": [
2506
  768
 
2527
  }
2528
  ]
2529
  },
2530
+ "h.5.ln_1.bias": {
2531
  "type": "Distributed",
2532
  "shape": [
2533
+ 768
 
2534
  ],
2535
  "dtype": "F32",
2536
  "chunks": [
2537
  {
2538
  "offsets": [
 
2539
  0
2540
  ],
2541
  "shape": [
2542
+ 384
 
2543
  ],
2544
  "filename_index": 0
2545
  },
2546
  {
2547
  "offsets": [
2548
+ 384
 
2549
  ],
2550
  "shape": [
2551
+ 384
 
2552
  ],
2553
  "filename_index": 1
2554
  }
2555
  ]
2556
  },
2557
+ "h.1.mlp.c_proj.weight": {
2558
  "type": "Distributed",
2559
  "shape": [
2560
  3072,
 
2586
  }
2587
  ]
2588
  },
2589
+ "h.6.ln_2.weight": {
2590
  "type": "Distributed",
2591
  "shape": [
2592
+ 768
 
 
 
2593
  ],
2594
  "dtype": "F32",
2595
  "chunks": [
2596
  {
2597
  "offsets": [
 
 
 
2598
  0
2599
  ],
2600
  "shape": [
2601
+ 384
 
 
 
2602
  ],
2603
  "filename_index": 0
2604
  },
2605
  {
2606
  "offsets": [
2607
+ 384
 
 
 
2608
  ],
2609
  "shape": [
2610
+ 384
 
 
 
2611
  ],
2612
  "filename_index": 1
2613
  }
2614
  ]
2615
  },
2616
+ "h.8.ln_1.bias": {
2617
  "type": "Distributed",
2618
  "shape": [
2619
+ 768
 
2620
  ],
2621
  "dtype": "F32",
2622
  "chunks": [
2623
  {
2624
  "offsets": [
 
2625
  0
2626
  ],
2627
  "shape": [
2628
+ 384
 
2629
  ],
2630
  "filename_index": 0
2631
  },
2632
  {
2633
  "offsets": [
2634
+ 384
 
2635
  ],
2636
  "shape": [
2637
+ 384
2638
+ ],
2639
+ "filename_index": 1
2640
+ }
2641
+ ]
2642
+ },
2643
+ "h.8.ln_2.bias": {
2644
+ "type": "Distributed",
2645
+ "shape": [
2646
+ 768
2647
+ ],
2648
+ "dtype": "F32",
2649
+ "chunks": [
2650
+ {
2651
+ "offsets": [
2652
+ 0
2653
+ ],
2654
+ "shape": [
2655
+ 384
2656
+ ],
2657
+ "filename_index": 0
2658
+ },
2659
+ {
2660
+ "offsets": [
2661
+ 384
2662
+ ],
2663
+ "shape": [
2664
+ 384
2665
  ],
2666
  "filename_index": 1
2667
  }
 
2699
  }
2700
  ]
2701
  },
2702
+ "h.7.attn.bias": {
2703
  "type": "Distributed",
2704
  "shape": [
2705
+ 1,
2706
+ 1,
2707
+ 1024,
2708
+ 1024
2709
  ],
2710
  "dtype": "F32",
2711
  "chunks": [
2712
  {
2713
  "offsets": [
2714
+ 0,
2715
+ 0,
2716
  0,
2717
  0
2718
  ],
2719
  "shape": [
2720
+ 1,
2721
+ 1,
2722
+ 1024,
2723
+ 512
2724
  ],
2725
  "filename_index": 0
2726
  },
2727
  {
2728
  "offsets": [
2729
  0,
2730
+ 0,
2731
+ 0,
2732
+ 512
2733
  ],
2734
  "shape": [
2735
+ 1,
2736
+ 1,
2737
+ 1024,
2738
+ 512
2739
  ],
2740
  "filename_index": 1
2741
  }
2742
  ]
2743
  },
2744
+ "h.1.attn.c_proj.weight": {
2745
  "type": "Distributed",
2746
  "shape": [
2747
+ 768,
2748
  768
2749
  ],
2750
  "dtype": "F32",
 
2755
  0
2756
  ],
2757
  "shape": [
2758
+ 384,
2759
  768
2760
  ],
2761
  "filename_index": 0
2762
  },
2763
  {
2764
  "offsets": [
2765
+ 384,
2766
  0
2767
  ],
2768
  "shape": [
2769
+ 384,
2770
  768
2771
  ],
2772
  "filename_index": 1
2773
  }
2774
  ]
2775
  },
2776
+ "h.7.attn.c_attn.bias": {
2777
  "type": "Distributed",
2778
  "shape": [
2779
+ 2304
2780
  ],
2781
  "dtype": "F32",
2782
  "chunks": [
 
2785
  0
2786
  ],
2787
  "shape": [
2788
+ 1152
2789
  ],
2790
  "filename_index": 0
2791
  },
2792
  {
2793
  "offsets": [
2794
+ 1152
2795
  ],
2796
  "shape": [
2797
+ 1152
2798
  ],
2799
  "filename_index": 1
2800
  }
2801
  ]
2802
  },
2803
+ "h.6.attn.c_attn.weight": {
2804
  "type": "Distributed",
2805
  "shape": [
2806
  768,
 
2832
  }
2833
  ]
2834
  },
2835
+ "h.11.ln_2.weight": {
2836
  "type": "Distributed",
2837
  "shape": [
2838
  768
 
2859
  }
2860
  ]
2861
  },
2862
+ "h.3.attn.c_proj.bias": {
2863
  "type": "Distributed",
2864
  "shape": [
2865
+ 768
 
 
 
2866
  ],
2867
  "dtype": "F32",
2868
  "chunks": [
2869
  {
2870
  "offsets": [
 
 
 
2871
  0
2872
  ],
2873
  "shape": [
2874
+ 384
 
 
 
2875
  ],
2876
  "filename_index": 0
2877
  },
2878
  {
2879
  "offsets": [
2880
+ 384
 
 
 
2881
  ],
2882
  "shape": [
2883
+ 384
 
 
 
2884
  ],
2885
  "filename_index": 1
2886
  }
2887
  ]
2888
  },
2889
+ "h.9.ln_2.bias": {
2890
  "type": "Distributed",
2891
  "shape": [
2892
  768
 
2913
  }
2914
  ]
2915
  },
2916
+ "h.9.attn.c_proj.bias": {
2917
  "type": "Distributed",
2918
  "shape": [
2919
  768
 
2940
  }
2941
  ]
2942
  },
2943
+ "h.5.mlp.c_fc.bias": {
2944
  "type": "Distributed",
2945
  "shape": [
2946
+ 3072
2947
  ],
2948
  "dtype": "F32",
2949
  "chunks": [
 
2952
  0
2953
  ],
2954
  "shape": [
2955
+ 1536
2956
  ],
2957
  "filename_index": 0
2958
  },
2959
  {
2960
  "offsets": [
2961
+ 1536
2962
  ],
2963
  "shape": [
2964
+ 1536
2965
  ],
2966
  "filename_index": 1
2967
  }
2968
  ]
2969
  },
2970
+ "h.2.attn.c_attn.weight": {
2971
  "type": "Distributed",
2972
  "shape": [
2973
+ 768,
2974
+ 2304
2975
  ],
2976
  "dtype": "F32",
2977
  "chunks": [
2978
  {
2979
  "offsets": [
2980
+ 0,
2981
  0
2982
  ],
2983
  "shape": [
2984
+ 768,
2985
+ 1152
2986
  ],
2987
  "filename_index": 0
2988
  },
2989
  {
2990
  "offsets": [
2991
+ 0,
2992
+ 1152
2993
  ],
2994
  "shape": [
2995
+ 768,
2996
+ 1152
2997
  ],
2998
  "filename_index": 1
2999
  }
3000
  ]
3001
  },
3002
+ "h.10.attn.bias": {
3003
  "type": "Distributed",
3004
  "shape": [
3005
  1,
 
3041
  }
3042
  ]
3043
  },
3044
+ "h.9.mlp.c_proj.weight": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3045
  "type": "Distributed",
3046
  "shape": [
3047
  3072,
 
3073
  }
3074
  ]
3075
  },
3076
+ "h.9.attn.c_attn.weight": {
3077
  "type": "Distributed",
3078
  "shape": [
3079
  768,
 
3105
  }
3106
  ]
3107
  },
3108
+ "h.5.mlp.c_fc.weight": {
3109
  "type": "Distributed",
3110
  "shape": [
3111
  768,
 
3137
  }
3138
  ]
3139
  },
3140
+ "h.4.ln_1.weight": {
3141
  "type": "Distributed",
3142
  "shape": [
3143
+ 768
 
3144
  ],
3145
  "dtype": "F32",
3146
  "chunks": [
3147
  {
3148
  "offsets": [
 
3149
  0
3150
  ],
3151
  "shape": [
3152
+ 384
 
3153
  ],
3154
  "filename_index": 0
3155
  },
3156
  {
3157
  "offsets": [
3158
+ 384
 
3159
  ],
3160
  "shape": [
3161
+ 384
 
3162
  ],
3163
  "filename_index": 1
3164
  }
3165
  ]
3166
  },
3167
+ "h.5.ln_1.weight": {
3168
  "type": "Distributed",
3169
  "shape": [
3170
+ 768
 
3171
  ],
3172
  "dtype": "F32",
3173
  "chunks": [
3174
  {
3175
  "offsets": [
 
3176
  0
3177
  ],
3178
  "shape": [
3179
+ 384
 
3180
  ],
3181
  "filename_index": 0
3182
  },
3183
  {
3184
  "offsets": [
3185
+ 384
 
3186
  ],
3187
  "shape": [
3188
+ 384
 
3189
  ],
3190
  "filename_index": 1
3191
  }
3192
  ]
3193
  },
3194
+ "h.3.ln_1.weight": {
3195
  "type": "Distributed",
3196
  "shape": [
3197
  768
 
3218
  }
3219
  ]
3220
  },
3221
+ "h.11.attn.c_proj.bias": {
3222
  "type": "Distributed",
3223
  "shape": [
3224
  768
 
3245
  }
3246
  ]
3247
  },
3248
+ "h.11.attn.c_attn.weight": {
3249
+ "type": "Distributed",
3250
+ "shape": [
3251
+ 768,
3252
+ 2304
3253
+ ],
3254
+ "dtype": "F32",
3255
+ "chunks": [
3256
+ {
3257
+ "offsets": [
3258
+ 0,
3259
+ 0
3260
+ ],
3261
+ "shape": [
3262
+ 768,
3263
+ 1152
3264
+ ],
3265
+ "filename_index": 0
3266
+ },
3267
+ {
3268
+ "offsets": [
3269
+ 0,
3270
+ 1152
3271
+ ],
3272
+ "shape": [
3273
+ 768,
3274
+ 1152
3275
+ ],
3276
+ "filename_index": 1
3277
+ }
3278
+ ]
3279
+ },
3280
+ "h.1.ln_1.bias": {
3281
  "type": "Distributed",
3282
  "shape": [
3283
  768
 
3304
  }
3305
  ]
3306
  },
3307
+ "h.0.ln_2.bias": {
3308
  "type": "Distributed",
3309
  "shape": [
 
3310
  768
3311
  ],
3312
  "dtype": "F32",
3313
  "chunks": [
3314
  {
3315
  "offsets": [
 
3316
  0
3317
  ],
3318
  "shape": [
3319
+ 384
 
3320
  ],
3321
  "filename_index": 0
3322
  },
3323
  {
3324
  "offsets": [
3325
+ 384
 
3326
  ],
3327
  "shape": [
3328
+ 384
 
3329
  ],
3330
  "filename_index": 1
3331
  }
3332
  ]
3333
  },
3334
+ "h.2.attn.c_attn.bias": {
3335
  "type": "Distributed",
3336
  "shape": [
3337
+ 2304
 
3338
  ],
3339
  "dtype": "F32",
3340
  "chunks": [
3341
  {
3342
  "offsets": [
 
3343
  0
3344
  ],
3345
  "shape": [
3346
+ 1152
 
3347
  ],
3348
  "filename_index": 0
3349
  },
3350
  {
3351
  "offsets": [
3352
+ 1152
 
3353
  ],
3354
  "shape": [
3355
+ 1152
 
3356
  ],
3357
  "filename_index": 1
3358
  }
3359
  ]
3360
  },
3361
+ "h.11.ln_2.bias": {
3362
  "type": "Distributed",
3363
  "shape": [
 
3364
  768
3365
  ],
3366
  "dtype": "F32",
3367
  "chunks": [
3368
  {
3369
  "offsets": [
 
3370
  0
3371
  ],
3372
  "shape": [
 
3373
  384
3374
  ],
3375
  "filename_index": 0
3376
  },
3377
  {
3378
  "offsets": [
 
3379
  384
3380
  ],
3381
  "shape": [
 
3382
  384
3383
  ],
3384
  "filename_index": 1
3385
  }
3386
  ]
3387
  },
3388
+ "h.4.attn.c_proj.bias": {
3389
  "type": "Distributed",
3390
  "shape": [
3391
  768
 
3412
  }
3413
  ]
3414
  },
3415
+ "h.10.mlp.c_fc.bias": {
3416
  "type": "Distributed",
3417
  "shape": [
3418
+ 3072
 
3419
  ],
3420
  "dtype": "F32",
3421
  "chunks": [
3422
  {
3423
  "offsets": [
 
3424
  0
3425
  ],
3426
  "shape": [
3427
+ 1536
 
3428
  ],
3429
  "filename_index": 0
3430
  },
3431
  {
3432
  "offsets": [
3433
+ 1536
3434
+ ],
3435
+ "shape": [
3436
+ 1536
3437
+ ],
3438
+ "filename_index": 1
3439
+ }
3440
+ ]
3441
+ },
3442
+ "h.11.mlp.c_proj.bias": {
3443
+ "type": "Distributed",
3444
+ "shape": [
3445
+ 768
3446
+ ],
3447
+ "dtype": "F32",
3448
+ "chunks": [
3449
+ {
3450
+ "offsets": [
3451
  0
3452
  ],
3453
  "shape": [
3454
+ 384
3455
+ ],
3456
+ "filename_index": 0
3457
+ },
3458
+ {
3459
+ "offsets": [
3460
+ 384
3461
+ ],
3462
+ "shape": [
3463
+ 384
3464
  ],
3465
  "filename_index": 1
3466
  }
3467
  ]
3468
  },
3469
+ "h.6.attn.bias": {
3470
  "type": "Distributed",
3471
  "shape": [
3472
  1,
 
3508
  }
3509
  ]
3510
  },
3511
+ "h.0.attn.bias": {
3512
  "type": "Distributed",
3513
  "shape": [
3514
+ 1,
3515
+ 1,
3516
+ 1024,
3517
+ 1024
3518
  ],
3519
  "dtype": "F32",
3520
  "chunks": [
3521
  {
3522
  "offsets": [
3523
+ 0,
3524
+ 0,
3525
+ 0,
3526
  0
3527
  ],
3528
  "shape": [
3529
+ 1,
3530
+ 1,
3531
+ 1024,
3532
+ 512
3533
  ],
3534
  "filename_index": 0
3535
  },
3536
  {
3537
  "offsets": [
3538
+ 0,
3539
+ 0,
3540
+ 0,
3541
+ 512
3542
  ],
3543
  "shape": [
3544
+ 1,
3545
+ 1,
3546
+ 1024,
3547
+ 512
3548
  ],
3549
  "filename_index": 1
3550
  }
3551
  ]
3552
  },
3553
+ "h.7.mlp.c_fc.bias": {
3554
  "type": "Distributed",
3555
  "shape": [
3556
+ 3072
3557
  ],
3558
  "dtype": "F32",
3559
  "chunks": [
 
3562
  0
3563
  ],
3564
  "shape": [
3565
+ 1536
3566
  ],
3567
  "filename_index": 0
3568
  },
3569
  {
3570
  "offsets": [
3571
+ 1536
3572
  ],
3573
  "shape": [
3574
+ 1536
3575
  ],
3576
  "filename_index": 1
3577
  }
3578
  ]
3579
  },
3580
+ "h.2.ln_1.weight": {
3581
  "type": "Distributed",
3582
  "shape": [
3583
  768
 
3604
  }
3605
  ]
3606
  },
3607
+ "h.10.attn.c_proj.bias": {
3608
  "type": "Distributed",
3609
  "shape": [
3610
+ 768
 
3611
  ],
3612
  "dtype": "F32",
3613
  "chunks": [
3614
  {
3615
  "offsets": [
 
3616
  0
3617
  ],
3618
  "shape": [
3619
+ 384
 
3620
  ],
3621
  "filename_index": 0
3622
  },
3623
  {
3624
  "offsets": [
3625
+ 384
 
3626
  ],
3627
  "shape": [
3628
+ 384
 
3629
  ],
3630
  "filename_index": 1
3631
  }
3632
  ]
3633
  },
3634
+ "h.1.attn.c_attn.weight": {
3635
  "type": "Distributed",
3636
  "shape": [
3637
+ 768,
3638
+ 2304
3639
  ],
3640
  "dtype": "F32",
3641
  "chunks": [
3642
  {
3643
  "offsets": [
3644
+ 0,
3645
  0
3646
  ],
3647
  "shape": [
3648
+ 768,
3649
+ 1152
3650
  ],
3651
  "filename_index": 0
3652
  },
3653
  {
3654
  "offsets": [
3655
+ 0,
3656
+ 1152
3657
  ],
3658
  "shape": [
3659
+ 768,
3660
+ 1152
3661
  ],
3662
  "filename_index": 1
3663
  }
3664
  ]
3665
  },
3666
+ "h.4.attn.c_attn.weight": {
3667
  "type": "Distributed",
3668
  "shape": [
3669
+ 768,
3670
+ 2304
 
 
3671
  ],
3672
  "dtype": "F32",
3673
  "chunks": [
3674
  {
3675
  "offsets": [
 
 
3676
  0,
3677
  0
3678
  ],
3679
  "shape": [
3680
+ 768,
3681
+ 1152
 
 
3682
  ],
3683
  "filename_index": 0
3684
  },
3685
  {
3686
  "offsets": [
3687
  0,
3688
+ 1152
 
 
3689
  ],
3690
  "shape": [
3691
+ 768,
3692
+ 1152
 
 
3693
  ],
3694
  "filename_index": 1
3695
  }
3696
  ]
3697
  },
3698
+ "h.4.mlp.c_fc.weight": {
3699
  "type": "Distributed",
3700
  "shape": [
3701
  768,
 
3727
  }
3728
  ]
3729
  },
3730
+ "h.9.ln_1.weight": {
3731
  "type": "Distributed",
3732
  "shape": [
 
3733
  768
3734
  ],
3735
  "dtype": "F32",
3736
  "chunks": [
3737
  {
3738
  "offsets": [
 
3739
  0
3740
  ],
3741
  "shape": [
3742
+ 384
 
3743
  ],
3744
  "filename_index": 0
3745
  },
3746
  {
3747
  "offsets": [
3748
+ 384
 
3749
  ],
3750
  "shape": [
3751
+ 384
 
3752
  ],
3753
  "filename_index": 1
3754
  }
3755
  ]
3756
  },
3757
+ "h.5.ln_2.bias": {
3758
  "type": "Distributed",
3759
  "shape": [
3760
  768
 
3781
  }
3782
  ]
3783
  },
3784
+ "h.8.ln_2.weight": {
3785
  "type": "Distributed",
3786
  "shape": [
3787
+ 768
3788
  ],
3789
  "dtype": "F32",
3790
  "chunks": [
 
3793
  0
3794
  ],
3795
  "shape": [
3796
+ 384
3797
  ],
3798
  "filename_index": 0
3799
  },
3800
  {
3801
  "offsets": [
3802
+ 384
3803
  ],
3804
  "shape": [
3805
+ 384
3806
  ],
3807
  "filename_index": 1
3808
  }
3809
  ]
3810
  },
3811
+ "h.2.ln_2.weight": {
3812
  "type": "Distributed",
3813
  "shape": [
3814
  768
 
3835
  }
3836
  ]
3837
  },
3838
+ "h.2.attn.c_proj.weight": {
3839
  "type": "Distributed",
3840
  "shape": [
3841
  768,
3842
+ 768
3843
  ],
3844
  "dtype": "F32",
3845
  "chunks": [
 
3849
  0
3850
  ],
3851
  "shape": [
3852
+ 384,
3853
+ 768
3854
  ],
3855
  "filename_index": 0
3856
  },
3857
  {
3858
  "offsets": [
3859
+ 384,
3860
+ 0
3861
  ],
3862
  "shape": [
3863
+ 384,
3864
+ 768
3865
  ],
3866
  "filename_index": 1
3867
  }
3868
  ]
3869
  },
3870
+ "h.0.attn.c_proj.weight": {
3871
  "type": "Distributed",
3872
  "shape": [
3873
+ 768,
3874
  768
3875
  ],
3876
  "dtype": "F32",
3877
  "chunks": [
3878
  {
3879
  "offsets": [
3880
+ 0,
3881
  0
3882
  ],
3883
  "shape": [
3884
+ 384,
3885
+ 768
3886
  ],
3887
  "filename_index": 0
3888
  },
3889
  {
3890
  "offsets": [
3891
+ 384,
3892
+ 0
3893
  ],
3894
  "shape": [
3895
+ 384,
3896
+ 768
3897
  ],
3898
  "filename_index": 1
3899
  }
3900
  ]
3901
  },
3902
+ "h.8.mlp.c_proj.bias": {
3903
  "type": "Distributed",
3904
  "shape": [
3905
+ 768
3906
  ],
3907
  "dtype": "F32",
3908
  "chunks": [
 
3911
  0
3912
  ],
3913
  "shape": [
3914
+ 384
3915
  ],
3916
  "filename_index": 0
3917
  },
3918
  {
3919
  "offsets": [
3920
+ 384
3921
  ],
3922
  "shape": [
3923
+ 384
3924
  ],
3925
  "filename_index": 1
3926
  }
3927
  ]
3928
  },
3929
+ "h.7.ln_1.bias": {
3930
  "type": "Distributed",
3931
  "shape": [
 
3932
  768
3933
  ],
3934
  "dtype": "F32",
3935
  "chunks": [
3936
  {
3937
  "offsets": [
 
3938
  0
3939
  ],
3940
  "shape": [
3941
+ 384
 
3942
  ],
3943
  "filename_index": 0
3944
  },
3945
  {
3946
  "offsets": [
3947
+ 384
 
3948
  ],
3949
  "shape": [
3950
+ 384
 
3951
  ],
3952
  "filename_index": 1
3953
  }
3954
  ]
3955
  },
3956
+ "h.3.mlp.c_proj.bias": {
3957
  "type": "Distributed",
3958
  "shape": [
 
3959
  768
3960
  ],
3961
  "dtype": "F32",
3962
  "chunks": [
3963
  {
3964
  "offsets": [
 
3965
  0
3966
  ],
3967
  "shape": [
3968
+ 384
 
3969
  ],
3970
  "filename_index": 0
3971
  },
3972
  {
3973
  "offsets": [
3974
+ 384
 
3975
  ],
3976
  "shape": [
3977
+ 384
 
3978
  ],
3979
  "filename_index": 1
3980
  }
3981
  ]
3982
  },
3983
+ "h.0.mlp.c_proj.bias": {
3984
  "type": "Distributed",
3985
  "shape": [
3986
  768
 
4007
  }
4008
  ]
4009
  },
4010
+ "h.8.attn.c_attn.bias": {
4011
  "type": "Distributed",
4012
  "shape": [
4013
+ 2304
 
4014
  ],
4015
  "dtype": "F32",
4016
  "chunks": [
4017
  {
4018
  "offsets": [
 
4019
  0
4020
  ],
4021
  "shape": [
4022
+ 1152
 
4023
  ],
4024
  "filename_index": 0
4025
  },
4026
  {
4027
  "offsets": [
4028
+ 1152
 
4029
  ],
4030
  "shape": [
4031
+ 1152
 
4032
  ],
4033
  "filename_index": 1
4034
  }
4035
  ]
4036
  },
4037
+ "h.3.attn.bias": {
4038
  "type": "Distributed",
4039
  "shape": [
4040
+ 1,
4041
+ 1,
4042
+ 1024,
4043
+ 1024
4044
  ],
4045
  "dtype": "F32",
4046
  "chunks": [
4047
  {
4048
  "offsets": [
4049
+ 0,
4050
+ 0,
4051
+ 0,
4052
  0
4053
  ],
4054
  "shape": [
4055
+ 1,
4056
+ 1,
4057
+ 1024,
4058
+ 512
4059
  ],
4060
  "filename_index": 0
4061
  },
4062
  {
4063
  "offsets": [
4064
+ 0,
4065
+ 0,
4066
+ 0,
4067
+ 512
4068
  ],
4069
  "shape": [
4070
+ 1,
4071
+ 1,
4072
+ 1024,
4073
+ 512
4074
  ],
4075
  "filename_index": 1
4076
  }
4077
  ]
4078
  },
4079
+ "h.6.ln_1.bias": {
4080
  "type": "Distributed",
4081
  "shape": [
4082
  768
 
4103
  }
4104
  ]
4105
  },
4106
+ "h.7.attn.c_attn.weight": {
4107
  "type": "Distributed",
4108
  "shape": [
4109
+ 768,
4110
  2304
4111
  ],
4112
  "dtype": "F32",
4113
  "chunks": [
4114
  {
4115
  "offsets": [
4116
+ 0,
4117
  0
4118
  ],
4119
  "shape": [
4120
+ 768,
4121
  1152
4122
  ],
4123
  "filename_index": 0
4124
  },
4125
  {
4126
  "offsets": [
4127
+ 0,
4128
  1152
4129
  ],
4130
  "shape": [
4131
+ 768,
4132
  1152
4133
  ],
4134
  "filename_index": 1
4135
  }
4136
  ]
4137
  },
4138
+ "h.4.attn.c_proj.weight": {
4139
  "type": "Distributed",
4140
  "shape": [
4141
+ 768,
4142
  768
4143
  ],
4144
  "dtype": "F32",
 
4149
  0
4150
  ],
4151
  "shape": [
4152
+ 384,
4153
  768
4154
  ],
4155
  "filename_index": 0
4156
  },
4157
  {
4158
  "offsets": [
4159
+ 384,
4160
  0
4161
  ],
4162
  "shape": [
4163
+ 384,
4164
  768
4165
  ],
4166
  "filename_index": 1
4167
  }
4168
  ]
4169
  },
4170
+ "h.6.mlp.c_proj.weight": {
4171
  "type": "Distributed",
4172
  "shape": [
4173
+ 3072,
4174
  768
4175
  ],
4176
  "dtype": "F32",
4177
  "chunks": [
4178
  {
4179
  "offsets": [
4180
+ 0,
4181
  0
4182
  ],
4183
  "shape": [
4184
+ 1536,
4185
+ 768
4186
  ],
4187
  "filename_index": 0
4188
  },
4189
  {
4190
  "offsets": [
4191
+ 1536,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4192
  0
4193
  ],
4194
  "shape": [
4195
+ 1536,
4196
+ 768
 
 
 
 
 
 
 
 
4197
  ],
4198
  "filename_index": 1
4199
  }
4200
  ]
4201
  },
4202
+ "h.8.attn.bias": {
4203
  "type": "Distributed",
4204
  "shape": [
4205
  1,
 
4241
  }
4242
  ]
4243
  },
4244
+ "h.4.mlp.c_fc.bias": {
4245
  "type": "Distributed",
4246
  "shape": [
4247
+ 3072
4248
  ],
4249
  "dtype": "F32",
4250
  "chunks": [
 
4253
  0
4254
  ],
4255
  "shape": [
4256
+ 1536
4257
  ],
4258
  "filename_index": 0
4259
  },
4260
  {
4261
  "offsets": [
4262
+ 1536
4263
  ],
4264
  "shape": [
4265
+ 1536
4266
  ],
4267
  "filename_index": 1
4268
  }
4269
  ]
4270
  },
4271
+ "h.3.attn.c_attn.bias": {
4272
  "type": "Distributed",
4273
  "shape": [
4274
+ 2304
4275
  ],
4276
  "dtype": "F32",
4277
  "chunks": [
 
4280
  0
4281
  ],
4282
  "shape": [
4283
+ 1152
4284
  ],
4285
  "filename_index": 0
4286
  },
4287
  {
4288
  "offsets": [
4289
+ 1152
4290
  ],
4291
  "shape": [
4292
+ 1152
4293
  ],
4294
  "filename_index": 1
4295
  }
4296
  ]
4297
  },
4298
+ "h.5.mlp.c_proj.weight": {
4299
  "type": "Distributed",
4300
  "shape": [
4301
+ 3072,
4302
  768
4303
  ],
4304
  "dtype": "F32",
4305
  "chunks": [
4306
  {
4307
  "offsets": [
4308
+ 0,
4309
  0
4310
  ],
4311
  "shape": [
4312
+ 1536,
4313
+ 768
4314
  ],
4315
  "filename_index": 0
4316
  },
4317
  {
4318
  "offsets": [
4319
+ 1536,
4320
+ 0
4321
  ],
4322
  "shape": [
4323
+ 1536,
4324
+ 768
4325
  ],
4326
  "filename_index": 1
4327
  }
4328
  ]
4329
  },
4330
+ "h.11.mlp.c_fc.weight": {
4331
  "type": "Distributed",
4332
  "shape": [
4333
+ 768,
4334
  3072
4335
  ],
4336
  "dtype": "F32",
4337
  "chunks": [
4338
  {
4339
  "offsets": [
4340
+ 0,
4341
  0
4342
  ],
4343
  "shape": [
4344
+ 768,
4345
  1536
4346
  ],
4347
  "filename_index": 0
4348
  },
4349
  {
4350
  "offsets": [
4351
+ 0,
4352
  1536
4353
  ],
4354
  "shape": [
4355
+ 768,
4356
  1536
4357
  ],
4358
  "filename_index": 1
4359
  }
4360
  ]
4361
  },
4362
+ "h.10.ln_2.weight": {
4363
  "type": "Distributed",
4364
  "shape": [
4365
  768
 
4386
  }
4387
  ]
4388
  },
4389
+ "h.2.mlp.c_fc.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4390
  "type": "Distributed",
4391
  "shape": [
 
4392
  3072
4393
  ],
4394
  "dtype": "F32",
4395
  "chunks": [
4396
  {
4397
  "offsets": [
 
4398
  0
4399
  ],
4400
  "shape": [
 
4401
  1536
4402
  ],
4403
  "filename_index": 0
4404
  },
4405
  {
4406
  "offsets": [
 
4407
  1536
4408
  ],
4409
  "shape": [
 
4410
  1536
4411
  ],
4412
  "filename_index": 1
4413
  }
4414
  ]
4415
  },
4416
+ "h.2.ln_1.bias": {
4417
  "type": "Distributed",
4418
  "shape": [
4419
  768
 
4440
  }
4441
  ]
4442
  },
4443
+ "ln_f.bias": {
4444
  "type": "Distributed",
4445
  "shape": [
4446
+ 768
 
 
 
4447
  ],
4448
  "dtype": "F32",
4449
  "chunks": [
4450
  {
4451
  "offsets": [
 
 
 
4452
  0
4453
  ],
4454
  "shape": [
4455
+ 384
 
 
 
4456
  ],
4457
  "filename_index": 0
4458
  },
4459
  {
4460
  "offsets": [
4461
+ 384
 
 
 
4462
  ],
4463
  "shape": [
4464
+ 384
 
 
 
4465
  ],
4466
  "filename_index": 1
4467
  }
4468
  ]
4469
  },
4470
+ "h.9.mlp.c_proj.bias": {
4471
  "type": "Distributed",
4472
  "shape": [
4473
  768
 
4494
  }
4495
  ]
4496
  },
4497
+ "h.3.mlp.c_fc.weight": {
4498
  "type": "Distributed",
4499
  "shape": [
4500
+ 768,
4501
+ 3072
4502
  ],
4503
  "dtype": "F32",
4504
  "chunks": [
4505
  {
4506
  "offsets": [
4507
+ 0,
4508
  0
4509
  ],
4510
  "shape": [
4511
+ 768,
4512
+ 1536
4513
  ],
4514
  "filename_index": 0
4515
  },
4516
  {
4517
  "offsets": [
4518
+ 0,
4519
+ 1536
4520
  ],
4521
  "shape": [
4522
+ 768,
4523
+ 1536
4524
  ],
4525
  "filename_index": 1
4526
  }
4527
  ]
4528
  },
4529
+ "h.11.attn.c_proj.weight": {
4530
  "type": "Distributed",
4531
  "shape": [
4532
  768,
4533
+ 768
4534
  ],
4535
  "dtype": "F32",
4536
  "chunks": [
 
4540
  0
4541
  ],
4542
  "shape": [
4543
+ 384,
4544
+ 768
4545
  ],
4546
  "filename_index": 0
4547
  },
4548
  {
4549
  "offsets": [
4550
+ 384,
4551
+ 0
4552
  ],
4553
  "shape": [
4554
+ 384,
4555
+ 768
4556
  ],
4557
  "filename_index": 1
4558
  }
4559
  ]
4560
  },
4561
+ "h.8.mlp.c_fc.bias": {
4562
  "type": "Distributed",
4563
  "shape": [
4564
  3072
 
4585
  }
4586
  ]
4587
  },
4588
+ "h.4.mlp.c_proj.weight": {
4589
  "type": "Distributed",
4590
  "shape": [
4591
+ 3072,
4592
  768
4593
  ],
4594
  "dtype": "F32",
 
4599
  0
4600
  ],
4601
  "shape": [
4602
+ 1536,
4603
  768
4604
  ],
4605
  "filename_index": 0
4606
  },
4607
  {
4608
  "offsets": [
4609
+ 1536,
4610
  0
4611
  ],
4612
  "shape": [
4613
+ 1536,
4614
  768
4615
  ],
4616
  "filename_index": 1
4617
  }
4618
  ]
4619
  },
4620
+ "h.10.attn.c_attn.weight": {
4621
  "type": "Distributed",
4622
  "shape": [
4623
+ 768,
4624
+ 2304
4625
  ],
4626
  "dtype": "F32",
4627
  "chunks": [
4628
  {
4629
  "offsets": [
4630
+ 0,
4631
  0
4632
  ],
4633
  "shape": [
4634
+ 768,
4635
+ 1152
4636
  ],
4637
  "filename_index": 0
4638
  },
4639
  {
4640
  "offsets": [
4641
+ 0,
4642
+ 1152
4643
  ],
4644
  "shape": [
4645
+ 768,
4646
+ 1152
4647
  ],
4648
  "filename_index": 1
4649
  }
4650
  ]
4651
  },
4652
+ "h.5.attn.c_attn.weight": {
4653
  "type": "Distributed",
4654
  "shape": [
4655
+ 768,
4656
+ 2304
4657
  ],
4658
  "dtype": "F32",
4659
  "chunks": [
4660
  {
4661
  "offsets": [
4662
+ 0,
4663
  0
4664
  ],
4665
  "shape": [
4666
+ 768,
4667
+ 1152
4668
  ],
4669
  "filename_index": 0
4670
  },
4671
  {
4672
  "offsets": [
4673
+ 0,
4674
+ 1152
4675
  ],
4676
  "shape": [
4677
+ 768,
4678
+ 1152
4679
  ],
4680
  "filename_index": 1
4681
  }
4682
  ]
4683
  },
4684
+ "h.9.attn.bias": {
4685
  "type": "Distributed",
4686
  "shape": [
4687
+ 1,
4688
+ 1,
4689
+ 1024,
4690
+ 1024
4691
  ],
4692
  "dtype": "F32",
4693
  "chunks": [
4694
  {
4695
  "offsets": [
4696
+ 0,
4697
+ 0,
4698
  0,
4699
  0
4700
  ],
4701
  "shape": [
4702
+ 1,
4703
+ 1,
4704
+ 1024,
4705
+ 512
4706
  ],
4707
  "filename_index": 0
4708
  },
4709
  {
4710
  "offsets": [
4711
+ 0,
4712
+ 0,
4713
+ 0,
4714
+ 512
4715
  ],
4716
  "shape": [
4717
+ 1,
4718
+ 1,
4719
+ 1024,
4720
+ 512
4721
  ],
4722
  "filename_index": 1
4723
  }
4724
  ]
4725
  },
4726
+ "h.8.attn.c_proj.bias": {
4727
  "type": "Distributed",
4728
  "shape": [
4729
  768