Narsil HF Staff commited on
Commit
b10e2c8
·
verified ·
1 Parent(s): 16043c2

Upload topology.json

Browse files
Files changed (1) hide show
  1. topology.json +931 -931
topology.json CHANGED
@@ -1,38 +1,33 @@
1
  {
2
  "tensors": {
3
- "h.0.mlp.c_proj.weight": {
4
  "type": "Distributed",
5
  "shape": [
6
- 3072,
7
  768
8
  ],
9
  "dtype": "F32",
10
  "chunks": [
11
  {
12
  "offsets": [
13
- 0,
14
  0
15
  ],
16
  "shape": [
17
- 1536,
18
- 768
19
  ],
20
  "filename_index": 0
21
  },
22
  {
23
  "offsets": [
24
- 1536,
25
- 0
26
  ],
27
  "shape": [
28
- 1536,
29
- 768
30
  ],
31
  "filename_index": 1
32
  }
33
  ]
34
  },
35
- "h.9.mlp.c_fc.weight": {
36
  "type": "Distributed",
37
  "shape": [
38
  768,
@@ -64,37 +59,52 @@
64
  }
65
  ]
66
  },
67
- "h.2.mlp.c_fc.bias": {
68
  "type": "Distributed",
69
  "shape": [
70
- 3072
 
 
 
71
  ],
72
  "dtype": "F32",
73
  "chunks": [
74
  {
75
  "offsets": [
 
 
 
76
  0
77
  ],
78
  "shape": [
79
- 1536
 
 
 
80
  ],
81
  "filename_index": 0
82
  },
83
  {
84
  "offsets": [
85
- 1536
 
 
 
86
  ],
87
  "shape": [
88
- 1536
 
 
 
89
  ],
90
  "filename_index": 1
91
  }
92
  ]
93
  },
94
- "h.8.ln_1.weight": {
95
  "type": "Distributed",
96
  "shape": [
97
- 768
98
  ],
99
  "dtype": "F32",
100
  "chunks": [
@@ -103,22 +113,22 @@
103
  0
104
  ],
105
  "shape": [
106
- 384
107
  ],
108
  "filename_index": 0
109
  },
110
  {
111
  "offsets": [
112
- 384
113
  ],
114
  "shape": [
115
- 384
116
  ],
117
  "filename_index": 1
118
  }
119
  ]
120
  },
121
- "h.2.attn.c_attn.weight": {
122
  "type": "Distributed",
123
  "shape": [
124
  768,
@@ -150,7 +160,7 @@
150
  }
151
  ]
152
  },
153
- "h.8.attn.c_proj.bias": {
154
  "type": "Distributed",
155
  "shape": [
156
  768
@@ -177,91 +187,106 @@
177
  }
178
  ]
179
  },
180
- "ln_f.bias": {
181
  "type": "Distributed",
182
  "shape": [
 
183
  768
184
  ],
185
  "dtype": "F32",
186
  "chunks": [
187
  {
188
  "offsets": [
 
189
  0
190
  ],
191
  "shape": [
192
- 384
 
193
  ],
194
  "filename_index": 0
195
  },
196
  {
197
  "offsets": [
198
- 384
 
199
  ],
200
  "shape": [
201
- 384
 
202
  ],
203
  "filename_index": 1
204
  }
205
  ]
206
  },
207
- "h.2.ln_2.weight": {
208
  "type": "Distributed",
209
  "shape": [
210
- 768
 
211
  ],
212
  "dtype": "F32",
213
  "chunks": [
214
  {
215
  "offsets": [
 
216
  0
217
  ],
218
  "shape": [
219
- 384
 
220
  ],
221
  "filename_index": 0
222
  },
223
  {
224
  "offsets": [
225
- 384
 
226
  ],
227
  "shape": [
228
- 384
 
229
  ],
230
  "filename_index": 1
231
  }
232
  ]
233
  },
234
- "h.6.ln_1.bias": {
235
  "type": "Distributed",
236
  "shape": [
 
237
  768
238
  ],
239
  "dtype": "F32",
240
  "chunks": [
241
  {
242
  "offsets": [
 
243
  0
244
  ],
245
  "shape": [
 
246
  384
247
  ],
248
  "filename_index": 0
249
  },
250
  {
251
  "offsets": [
 
252
  384
253
  ],
254
  "shape": [
 
255
  384
256
  ],
257
  "filename_index": 1
258
  }
259
  ]
260
  },
261
- "h.2.mlp.c_proj.weight": {
262
  "type": "Distributed",
263
  "shape": [
264
- 3072,
265
  768
266
  ],
267
  "dtype": "F32",
@@ -272,57 +297,25 @@
272
  0
273
  ],
274
  "shape": [
275
- 1536,
276
  768
277
  ],
278
  "filename_index": 0
279
  },
280
  {
281
  "offsets": [
282
- 1536,
283
  0
284
  ],
285
  "shape": [
286
- 1536,
287
  768
288
  ],
289
  "filename_index": 1
290
  }
291
  ]
292
  },
293
- "h.10.mlp.c_fc.weight": {
294
- "type": "Distributed",
295
- "shape": [
296
- 768,
297
- 3072
298
- ],
299
- "dtype": "F32",
300
- "chunks": [
301
- {
302
- "offsets": [
303
- 0,
304
- 0
305
- ],
306
- "shape": [
307
- 768,
308
- 1536
309
- ],
310
- "filename_index": 0
311
- },
312
- {
313
- "offsets": [
314
- 0,
315
- 1536
316
- ],
317
- "shape": [
318
- 768,
319
- 1536
320
- ],
321
- "filename_index": 1
322
- }
323
- ]
324
- },
325
- "h.8.ln_1.bias": {
326
  "type": "Distributed",
327
  "shape": [
328
  768
@@ -349,10 +342,10 @@
349
  }
350
  ]
351
  },
352
- "h.9.attn.c_attn.bias": {
353
  "type": "Distributed",
354
  "shape": [
355
- 2304
356
  ],
357
  "dtype": "F32",
358
  "chunks": [
@@ -361,52 +354,57 @@
361
  0
362
  ],
363
  "shape": [
364
- 1152
365
  ],
366
  "filename_index": 0
367
  },
368
  {
369
  "offsets": [
370
- 1152
371
  ],
372
  "shape": [
373
- 1152
374
  ],
375
  "filename_index": 1
376
  }
377
  ]
378
  },
379
- "h.5.ln_2.bias": {
380
  "type": "Distributed",
381
  "shape": [
 
382
  768
383
  ],
384
  "dtype": "F32",
385
  "chunks": [
386
  {
387
  "offsets": [
 
388
  0
389
  ],
390
  "shape": [
391
- 384
 
392
  ],
393
  "filename_index": 0
394
  },
395
  {
396
  "offsets": [
397
- 384
 
398
  ],
399
  "shape": [
400
- 384
 
401
  ],
402
  "filename_index": 1
403
  }
404
  ]
405
  },
406
- "h.6.mlp.c_proj.weight": {
407
  "type": "Distributed",
408
  "shape": [
409
- 3072,
410
  768
411
  ],
412
  "dtype": "F32",
@@ -417,82 +415,92 @@
417
  0
418
  ],
419
  "shape": [
420
- 1536,
421
  768
422
  ],
423
  "filename_index": 0
424
  },
425
  {
426
  "offsets": [
427
- 1536,
428
  0
429
  ],
430
  "shape": [
431
- 1536,
432
  768
433
  ],
434
  "filename_index": 1
435
  }
436
  ]
437
  },
438
- "h.7.mlp.c_proj.bias": {
439
  "type": "Distributed",
440
  "shape": [
441
- 768
 
442
  ],
443
  "dtype": "F32",
444
  "chunks": [
445
  {
446
  "offsets": [
 
447
  0
448
  ],
449
  "shape": [
450
- 384
 
451
  ],
452
  "filename_index": 0
453
  },
454
  {
455
  "offsets": [
456
- 384
 
457
  ],
458
  "shape": [
459
- 384
 
460
  ],
461
  "filename_index": 1
462
  }
463
  ]
464
  },
465
- "h.1.mlp.c_fc.bias": {
466
  "type": "Distributed",
467
  "shape": [
468
- 3072
 
469
  ],
470
  "dtype": "F32",
471
  "chunks": [
472
  {
473
  "offsets": [
 
474
  0
475
  ],
476
  "shape": [
477
- 1536
 
478
  ],
479
  "filename_index": 0
480
  },
481
  {
482
  "offsets": [
483
- 1536
 
484
  ],
485
  "shape": [
486
- 1536
 
487
  ],
488
  "filename_index": 1
489
  }
490
  ]
491
  },
492
- "h.9.mlp.c_fc.bias": {
493
  "type": "Distributed",
494
  "shape": [
495
- 3072
496
  ],
497
  "dtype": "F32",
498
  "chunks": [
@@ -501,81 +509,91 @@
501
  0
502
  ],
503
  "shape": [
504
- 1536
505
  ],
506
  "filename_index": 0
507
  },
508
  {
509
  "offsets": [
510
- 1536
511
  ],
512
  "shape": [
513
- 1536
514
  ],
515
  "filename_index": 1
516
  }
517
  ]
518
  },
519
- "h.5.mlp.c_fc.weight": {
520
  "type": "Distributed",
521
  "shape": [
522
- 768,
523
- 3072
524
  ],
525
  "dtype": "F32",
526
  "chunks": [
527
  {
528
  "offsets": [
529
- 0,
530
  0
531
  ],
532
  "shape": [
533
- 768,
534
- 1536
535
  ],
536
  "filename_index": 0
537
  },
538
  {
539
  "offsets": [
540
- 0,
541
- 1536
542
  ],
543
  "shape": [
544
- 768,
545
- 1536
546
  ],
547
  "filename_index": 1
548
  }
549
  ]
550
  },
551
- "h.7.mlp.c_fc.bias": {
552
  "type": "Distributed",
553
  "shape": [
554
- 3072
 
 
 
555
  ],
556
  "dtype": "F32",
557
  "chunks": [
558
  {
559
  "offsets": [
 
 
 
560
  0
561
  ],
562
  "shape": [
563
- 1536
 
 
 
564
  ],
565
  "filename_index": 0
566
  },
567
  {
568
  "offsets": [
569
- 1536
 
 
 
570
  ],
571
  "shape": [
572
- 1536
 
 
 
573
  ],
574
  "filename_index": 1
575
  }
576
  ]
577
  },
578
- "h.4.attn.c_proj.bias": {
579
  "type": "Distributed",
580
  "shape": [
581
  768
@@ -602,103 +620,76 @@
602
  }
603
  ]
604
  },
605
- "h.1.attn.c_proj.weight": {
606
  "type": "Distributed",
607
  "shape": [
608
- 768,
609
- 768
 
 
610
  ],
611
  "dtype": "F32",
612
  "chunks": [
613
  {
614
  "offsets": [
615
  0,
616
- 0
617
- ],
618
- "shape": [
619
- 384,
620
- 768
621
- ],
622
- "filename_index": 0
623
- },
624
- {
625
- "offsets": [
626
- 384,
627
- 0
628
- ],
629
- "shape": [
630
- 384,
631
- 768
632
- ],
633
- "filename_index": 1
634
- }
635
- ]
636
- },
637
- "h.0.mlp.c_fc.weight": {
638
- "type": "Distributed",
639
- "shape": [
640
- 768,
641
- 3072
642
- ],
643
- "dtype": "F32",
644
- "chunks": [
645
- {
646
- "offsets": [
647
  0,
648
  0
649
  ],
650
  "shape": [
651
- 768,
652
- 1536
 
 
653
  ],
654
  "filename_index": 0
655
  },
656
  {
657
  "offsets": [
658
  0,
659
- 1536
 
 
660
  ],
661
  "shape": [
662
- 768,
663
- 1536
 
 
664
  ],
665
  "filename_index": 1
666
  }
667
  ]
668
  },
669
- "h.8.mlp.c_fc.weight": {
670
  "type": "Distributed",
671
  "shape": [
672
- 768,
673
- 3072
674
  ],
675
  "dtype": "F32",
676
  "chunks": [
677
  {
678
  "offsets": [
679
- 0,
680
  0
681
  ],
682
  "shape": [
683
- 768,
684
- 1536
685
  ],
686
  "filename_index": 0
687
  },
688
  {
689
  "offsets": [
690
- 0,
691
- 1536
692
  ],
693
  "shape": [
694
- 768,
695
- 1536
696
  ],
697
  "filename_index": 1
698
  }
699
  ]
700
  },
701
- "h.5.ln_2.weight": {
702
  "type": "Distributed",
703
  "shape": [
704
  768
@@ -725,11 +716,11 @@
725
  }
726
  ]
727
  },
728
- "h.11.mlp.c_fc.weight": {
729
  "type": "Distributed",
730
  "shape": [
731
- 768,
732
- 3072
733
  ],
734
  "dtype": "F32",
735
  "chunks": [
@@ -739,25 +730,25 @@
739
  0
740
  ],
741
  "shape": [
742
- 768,
743
- 1536
744
  ],
745
  "filename_index": 0
746
  },
747
  {
748
  "offsets": [
749
- 0,
750
- 1536
751
  ],
752
  "shape": [
753
- 768,
754
- 1536
755
  ],
756
  "filename_index": 1
757
  }
758
  ]
759
  },
760
- "h.9.mlp.c_proj.bias": {
761
  "type": "Distributed",
762
  "shape": [
763
  768
@@ -784,66 +775,81 @@
784
  }
785
  ]
786
  },
787
- "h.6.mlp.c_fc.bias": {
788
  "type": "Distributed",
789
  "shape": [
790
- 3072
 
791
  ],
792
  "dtype": "F32",
793
  "chunks": [
794
  {
795
  "offsets": [
 
796
  0
797
  ],
798
  "shape": [
799
- 1536
 
800
  ],
801
  "filename_index": 0
802
  },
803
  {
804
  "offsets": [
805
- 1536
 
806
  ],
807
  "shape": [
808
- 1536
 
809
  ],
810
  "filename_index": 1
811
  }
812
  ]
813
  },
814
- "h.5.mlp.c_proj.weight": {
815
  "type": "Distributed",
816
  "shape": [
817
- 3072,
818
- 768
 
 
819
  ],
820
  "dtype": "F32",
821
  "chunks": [
822
  {
823
  "offsets": [
 
 
824
  0,
825
  0
826
  ],
827
  "shape": [
828
- 1536,
829
- 768
 
 
830
  ],
831
  "filename_index": 0
832
  },
833
  {
834
  "offsets": [
835
- 1536,
836
- 0
 
 
837
  ],
838
  "shape": [
839
- 1536,
840
- 768
 
 
841
  ],
842
  "filename_index": 1
843
  }
844
  ]
845
  },
846
- "h.7.attn.c_proj.bias": {
847
  "type": "Distributed",
848
  "shape": [
849
  768
@@ -870,11 +876,11 @@
870
  }
871
  ]
872
  },
873
- "h.6.attn.c_proj.weight": {
874
  "type": "Distributed",
875
  "shape": [
876
  768,
877
- 768
878
  ],
879
  "dtype": "F32",
880
  "chunks": [
@@ -884,60 +890,55 @@
884
  0
885
  ],
886
  "shape": [
887
- 384,
888
- 768
889
  ],
890
  "filename_index": 0
891
  },
892
  {
893
  "offsets": [
894
- 384,
895
- 0
896
  ],
897
  "shape": [
898
- 384,
899
- 768
900
  ],
901
  "filename_index": 1
902
  }
903
  ]
904
  },
905
- "wpe.weight": {
906
  "type": "Distributed",
907
  "shape": [
908
- 1024,
909
  768
910
  ],
911
  "dtype": "F32",
912
  "chunks": [
913
  {
914
  "offsets": [
915
- 0,
916
  0
917
  ],
918
  "shape": [
919
- 1024,
920
  384
921
  ],
922
  "filename_index": 0
923
  },
924
  {
925
  "offsets": [
926
- 0,
927
  384
928
  ],
929
  "shape": [
930
- 1024,
931
  384
932
  ],
933
  "filename_index": 1
934
  }
935
  ]
936
  },
937
- "h.1.attn.c_proj.bias": {
938
  "type": "Distributed",
939
  "shape": [
940
- 768
941
  ],
942
  "dtype": "F32",
943
  "chunks": [
@@ -946,22 +947,22 @@
946
  0
947
  ],
948
  "shape": [
949
- 384
950
  ],
951
  "filename_index": 0
952
  },
953
  {
954
  "offsets": [
955
- 384
956
  ],
957
  "shape": [
958
- 384
959
  ],
960
  "filename_index": 1
961
  }
962
  ]
963
  },
964
- "h.3.mlp.c_proj.weight": {
965
  "type": "Distributed",
966
  "shape": [
967
  3072,
@@ -993,81 +994,61 @@
993
  }
994
  ]
995
  },
996
- "h.2.mlp.c_fc.weight": {
997
  "type": "Distributed",
998
  "shape": [
999
- 768,
1000
- 3072
1001
  ],
1002
  "dtype": "F32",
1003
  "chunks": [
1004
  {
1005
  "offsets": [
1006
- 0,
1007
  0
1008
  ],
1009
  "shape": [
1010
- 768,
1011
- 1536
1012
  ],
1013
  "filename_index": 0
1014
  },
1015
  {
1016
  "offsets": [
1017
- 0,
1018
- 1536
1019
  ],
1020
  "shape": [
1021
- 768,
1022
- 1536
1023
  ],
1024
  "filename_index": 1
1025
  }
1026
  ]
1027
  },
1028
- "h.1.attn.bias": {
1029
  "type": "Distributed",
1030
  "shape": [
1031
- 1,
1032
- 1,
1033
- 1024,
1034
- 1024
1035
  ],
1036
  "dtype": "F32",
1037
  "chunks": [
1038
  {
1039
  "offsets": [
1040
- 0,
1041
- 0,
1042
- 0,
1043
  0
1044
  ],
1045
  "shape": [
1046
- 1,
1047
- 1,
1048
- 1024,
1049
- 512
1050
  ],
1051
  "filename_index": 0
1052
  },
1053
  {
1054
  "offsets": [
1055
- 0,
1056
- 0,
1057
- 0,
1058
- 512
1059
  ],
1060
  "shape": [
1061
- 1,
1062
- 1,
1063
- 1024,
1064
- 512
1065
  ],
1066
  "filename_index": 1
1067
  }
1068
  ]
1069
  },
1070
- "h.4.ln_1.bias": {
1071
  "type": "Distributed",
1072
  "shape": [
1073
  768
@@ -1094,7 +1075,7 @@
1094
  }
1095
  ]
1096
  },
1097
- "h.0.ln_1.weight": {
1098
  "type": "Distributed",
1099
  "shape": [
1100
  768
@@ -1121,10 +1102,10 @@
1121
  }
1122
  ]
1123
  },
1124
- "h.2.mlp.c_proj.bias": {
1125
  "type": "Distributed",
1126
  "shape": [
1127
- 768
1128
  ],
1129
  "dtype": "F32",
1130
  "chunks": [
@@ -1133,22 +1114,22 @@
1133
  0
1134
  ],
1135
  "shape": [
1136
- 384
1137
  ],
1138
  "filename_index": 0
1139
  },
1140
  {
1141
  "offsets": [
1142
- 384
1143
  ],
1144
  "shape": [
1145
- 384
1146
  ],
1147
  "filename_index": 1
1148
  }
1149
  ]
1150
  },
1151
- "h.7.attn.c_proj.weight": {
1152
  "type": "Distributed",
1153
  "shape": [
1154
  768,
@@ -1180,69 +1161,69 @@
1180
  }
1181
  ]
1182
  },
1183
- "h.11.ln_2.bias": {
1184
  "type": "Distributed",
1185
  "shape": [
1186
- 768
 
1187
  ],
1188
  "dtype": "F32",
1189
  "chunks": [
1190
  {
1191
  "offsets": [
 
1192
  0
1193
  ],
1194
  "shape": [
1195
- 384
 
1196
  ],
1197
  "filename_index": 0
1198
  },
1199
  {
1200
  "offsets": [
1201
- 384
 
1202
  ],
1203
  "shape": [
1204
- 384
 
1205
  ],
1206
  "filename_index": 1
1207
  }
1208
  ]
1209
  },
1210
- "h.9.attn.c_proj.weight": {
1211
  "type": "Distributed",
1212
  "shape": [
1213
- 768,
1214
  768
1215
  ],
1216
  "dtype": "F32",
1217
  "chunks": [
1218
  {
1219
  "offsets": [
1220
- 0,
1221
  0
1222
  ],
1223
  "shape": [
1224
- 384,
1225
- 768
1226
  ],
1227
  "filename_index": 0
1228
  },
1229
  {
1230
  "offsets": [
1231
- 384,
1232
- 0
1233
  ],
1234
  "shape": [
1235
- 384,
1236
- 768
1237
  ],
1238
  "filename_index": 1
1239
  }
1240
  ]
1241
  },
1242
- "h.11.attn.c_proj.bias": {
1243
  "type": "Distributed",
1244
  "shape": [
1245
- 768
1246
  ],
1247
  "dtype": "F32",
1248
  "chunks": [
@@ -1251,118 +1232,103 @@
1251
  0
1252
  ],
1253
  "shape": [
1254
- 384
1255
  ],
1256
  "filename_index": 0
1257
  },
1258
  {
1259
  "offsets": [
1260
- 384
1261
  ],
1262
  "shape": [
1263
- 384
1264
  ],
1265
  "filename_index": 1
1266
  }
1267
  ]
1268
  },
1269
- "h.1.mlp.c_fc.weight": {
1270
  "type": "Distributed",
1271
  "shape": [
1272
- 768,
1273
- 3072
1274
  ],
1275
  "dtype": "F32",
1276
  "chunks": [
1277
  {
1278
  "offsets": [
1279
- 0,
1280
  0
1281
  ],
1282
  "shape": [
1283
- 768,
1284
- 1536
1285
  ],
1286
  "filename_index": 0
1287
  },
1288
  {
1289
  "offsets": [
1290
- 0,
1291
- 1536
1292
  ],
1293
  "shape": [
1294
- 768,
1295
- 1536
1296
  ],
1297
  "filename_index": 1
1298
  }
1299
  ]
1300
  },
1301
- "h.11.attn.c_attn.weight": {
1302
  "type": "Distributed",
1303
  "shape": [
1304
- 768,
1305
- 2304
1306
  ],
1307
  "dtype": "F32",
1308
  "chunks": [
1309
  {
1310
  "offsets": [
1311
- 0,
1312
  0
1313
  ],
1314
  "shape": [
1315
- 768,
1316
- 1152
1317
  ],
1318
  "filename_index": 0
1319
  },
1320
  {
1321
  "offsets": [
1322
- 0,
1323
- 1152
1324
  ],
1325
  "shape": [
1326
- 768,
1327
- 1152
1328
  ],
1329
  "filename_index": 1
1330
  }
1331
  ]
1332
  },
1333
- "h.3.mlp.c_fc.weight": {
1334
  "type": "Distributed",
1335
  "shape": [
1336
- 768,
1337
- 3072
1338
  ],
1339
  "dtype": "F32",
1340
  "chunks": [
1341
  {
1342
  "offsets": [
1343
- 0,
1344
  0
1345
  ],
1346
  "shape": [
1347
- 768,
1348
- 1536
1349
  ],
1350
  "filename_index": 0
1351
  },
1352
  {
1353
  "offsets": [
1354
- 0,
1355
- 1536
1356
  ],
1357
  "shape": [
1358
- 768,
1359
- 1536
1360
  ],
1361
  "filename_index": 1
1362
  }
1363
  ]
1364
  },
1365
- "h.9.ln_1.weight": {
1366
  "type": "Distributed",
1367
  "shape": [
1368
  768
@@ -1389,7 +1355,7 @@
1389
  }
1390
  ]
1391
  },
1392
- "h.5.ln_1.weight": {
1393
  "type": "Distributed",
1394
  "shape": [
1395
  768
@@ -1416,10 +1382,10 @@
1416
  }
1417
  ]
1418
  },
1419
- "h.4.attn.c_attn.bias": {
1420
  "type": "Distributed",
1421
  "shape": [
1422
- 2304
1423
  ],
1424
  "dtype": "F32",
1425
  "chunks": [
@@ -1428,22 +1394,22 @@
1428
  0
1429
  ],
1430
  "shape": [
1431
- 1152
1432
  ],
1433
  "filename_index": 0
1434
  },
1435
  {
1436
  "offsets": [
1437
- 1152
1438
  ],
1439
  "shape": [
1440
- 1152
1441
  ],
1442
  "filename_index": 1
1443
  }
1444
  ]
1445
  },
1446
- "h.10.attn.c_attn.bias": {
1447
  "type": "Distributed",
1448
  "shape": [
1449
  2304
@@ -1470,71 +1436,61 @@
1470
  }
1471
  ]
1472
  },
1473
- "wte.weight": {
1474
  "type": "Distributed",
1475
  "shape": [
1476
- 50257,
1477
  768
1478
  ],
1479
  "dtype": "F32",
1480
  "chunks": [
1481
  {
1482
  "offsets": [
1483
- 0,
1484
  0
1485
  ],
1486
  "shape": [
1487
- 50257,
1488
  384
1489
  ],
1490
  "filename_index": 0
1491
  },
1492
  {
1493
  "offsets": [
1494
- 0,
1495
  384
1496
  ],
1497
  "shape": [
1498
- 50257,
1499
  384
1500
  ],
1501
  "filename_index": 1
1502
  }
1503
  ]
1504
  },
1505
- "h.4.mlp.c_fc.weight": {
1506
  "type": "Distributed",
1507
  "shape": [
1508
- 768,
1509
  3072
1510
  ],
1511
  "dtype": "F32",
1512
  "chunks": [
1513
  {
1514
  "offsets": [
1515
- 0,
1516
  0
1517
  ],
1518
  "shape": [
1519
- 768,
1520
  1536
1521
  ],
1522
  "filename_index": 0
1523
  },
1524
  {
1525
  "offsets": [
1526
- 0,
1527
  1536
1528
  ],
1529
  "shape": [
1530
- 768,
1531
  1536
1532
  ],
1533
  "filename_index": 1
1534
  }
1535
  ]
1536
  },
1537
- "h.0.mlp.c_proj.bias": {
1538
  "type": "Distributed",
1539
  "shape": [
1540
  768
@@ -1561,10 +1517,10 @@
1561
  }
1562
  ]
1563
  },
1564
- "h.10.mlp.c_fc.bias": {
1565
  "type": "Distributed",
1566
  "shape": [
1567
- 3072
1568
  ],
1569
  "dtype": "F32",
1570
  "chunks": [
@@ -1573,22 +1529,22 @@
1573
  0
1574
  ],
1575
  "shape": [
1576
- 1536
1577
  ],
1578
  "filename_index": 0
1579
  },
1580
  {
1581
  "offsets": [
1582
- 1536
1583
  ],
1584
  "shape": [
1585
- 1536
1586
  ],
1587
  "filename_index": 1
1588
  }
1589
  ]
1590
  },
1591
- "ln_f.weight": {
1592
  "type": "Distributed",
1593
  "shape": [
1594
  768
@@ -1615,7 +1571,7 @@
1615
  }
1616
  ]
1617
  },
1618
- "h.2.ln_2.bias": {
1619
  "type": "Distributed",
1620
  "shape": [
1621
  768
@@ -1642,69 +1598,69 @@
1642
  }
1643
  ]
1644
  },
1645
- "h.3.attn.c_attn.bias": {
1646
  "type": "Distributed",
1647
  "shape": [
1648
- 2304
 
1649
  ],
1650
  "dtype": "F32",
1651
  "chunks": [
1652
  {
1653
  "offsets": [
 
1654
  0
1655
  ],
1656
  "shape": [
1657
- 1152
 
1658
  ],
1659
  "filename_index": 0
1660
  },
1661
  {
1662
  "offsets": [
1663
- 1152
 
1664
  ],
1665
  "shape": [
1666
- 1152
 
1667
  ],
1668
  "filename_index": 1
1669
  }
1670
  ]
1671
  },
1672
- "h.11.mlp.c_proj.weight": {
1673
  "type": "Distributed",
1674
  "shape": [
1675
- 3072,
1676
  768
1677
  ],
1678
  "dtype": "F32",
1679
  "chunks": [
1680
  {
1681
  "offsets": [
1682
- 0,
1683
  0
1684
  ],
1685
  "shape": [
1686
- 1536,
1687
- 768
1688
  ],
1689
  "filename_index": 0
1690
  },
1691
  {
1692
  "offsets": [
1693
- 1536,
1694
- 0
1695
  ],
1696
  "shape": [
1697
- 1536,
1698
- 768
1699
  ],
1700
  "filename_index": 1
1701
  }
1702
  ]
1703
  },
1704
- "h.7.attn.c_attn.bias": {
1705
  "type": "Distributed",
1706
  "shape": [
1707
- 2304
1708
  ],
1709
  "dtype": "F32",
1710
  "chunks": [
@@ -1713,22 +1669,22 @@
1713
  0
1714
  ],
1715
  "shape": [
1716
- 1152
1717
  ],
1718
  "filename_index": 0
1719
  },
1720
  {
1721
  "offsets": [
1722
- 1152
1723
  ],
1724
  "shape": [
1725
- 1152
1726
  ],
1727
  "filename_index": 1
1728
  }
1729
  ]
1730
  },
1731
- "h.4.mlp.c_fc.bias": {
1732
  "type": "Distributed",
1733
  "shape": [
1734
  3072
@@ -1755,6 +1711,38 @@
1755
  }
1756
  ]
1757
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1758
  "h.6.mlp.c_proj.bias": {
1759
  "type": "Distributed",
1760
  "shape": [
@@ -1782,39 +1770,34 @@
1782
  }
1783
  ]
1784
  },
1785
- "h.10.attn.c_proj.weight": {
1786
  "type": "Distributed",
1787
  "shape": [
1788
- 768,
1789
- 768
1790
  ],
1791
  "dtype": "F32",
1792
  "chunks": [
1793
  {
1794
  "offsets": [
1795
- 0,
1796
  0
1797
  ],
1798
  "shape": [
1799
- 384,
1800
- 768
1801
  ],
1802
  "filename_index": 0
1803
  },
1804
  {
1805
  "offsets": [
1806
- 384,
1807
- 0
1808
  ],
1809
  "shape": [
1810
- 384,
1811
- 768
1812
  ],
1813
  "filename_index": 1
1814
  }
1815
  ]
1816
  },
1817
- "h.10.ln_1.weight": {
1818
  "type": "Distributed",
1819
  "shape": [
1820
  768
@@ -1841,7 +1824,7 @@
1841
  }
1842
  ]
1843
  },
1844
- "h.5.attn.c_proj.weight": {
1845
  "type": "Distributed",
1846
  "shape": [
1847
  768,
@@ -1873,10 +1856,10 @@
1873
  }
1874
  ]
1875
  },
1876
- "h.8.ln_2.bias": {
1877
  "type": "Distributed",
1878
  "shape": [
1879
- 768
1880
  ],
1881
  "dtype": "F32",
1882
  "chunks": [
@@ -1885,76 +1868,86 @@
1885
  0
1886
  ],
1887
  "shape": [
1888
- 384
1889
  ],
1890
  "filename_index": 0
1891
  },
1892
  {
1893
  "offsets": [
1894
- 384
1895
  ],
1896
  "shape": [
1897
- 384
1898
  ],
1899
  "filename_index": 1
1900
  }
1901
  ]
1902
  },
1903
- "h.10.ln_1.bias": {
1904
  "type": "Distributed",
1905
  "shape": [
 
1906
  768
1907
  ],
1908
  "dtype": "F32",
1909
  "chunks": [
1910
  {
1911
  "offsets": [
 
1912
  0
1913
  ],
1914
  "shape": [
1915
- 384
 
1916
  ],
1917
  "filename_index": 0
1918
  },
1919
  {
1920
  "offsets": [
1921
- 384
 
1922
  ],
1923
  "shape": [
1924
- 384
 
1925
  ],
1926
  "filename_index": 1
1927
  }
1928
  ]
1929
  },
1930
- "h.4.mlp.c_proj.bias": {
1931
  "type": "Distributed",
1932
  "shape": [
 
1933
  768
1934
  ],
1935
  "dtype": "F32",
1936
  "chunks": [
1937
  {
1938
  "offsets": [
 
1939
  0
1940
  ],
1941
  "shape": [
1942
- 384
 
1943
  ],
1944
  "filename_index": 0
1945
  },
1946
  {
1947
  "offsets": [
1948
- 384
 
1949
  ],
1950
  "shape": [
1951
- 384
 
1952
  ],
1953
  "filename_index": 1
1954
  }
1955
  ]
1956
  },
1957
- "h.5.ln_1.bias": {
1958
  "type": "Distributed",
1959
  "shape": [
1960
  768
@@ -1981,7 +1974,7 @@
1981
  }
1982
  ]
1983
  },
1984
- "h.9.ln_2.bias": {
1985
  "type": "Distributed",
1986
  "shape": [
1987
  768
@@ -2008,10 +2001,10 @@
2008
  }
2009
  ]
2010
  },
2011
- "h.1.attn.c_attn.bias": {
2012
  "type": "Distributed",
2013
  "shape": [
2014
- 2304
2015
  ],
2016
  "dtype": "F32",
2017
  "chunks": [
@@ -2020,25 +2013,25 @@
2020
  0
2021
  ],
2022
  "shape": [
2023
- 1152
2024
  ],
2025
  "filename_index": 0
2026
  },
2027
  {
2028
  "offsets": [
2029
- 1152
2030
  ],
2031
  "shape": [
2032
- 1152
2033
  ],
2034
  "filename_index": 1
2035
  }
2036
  ]
2037
  },
2038
- "h.0.attn.c_attn.bias": {
2039
  "type": "Distributed",
2040
  "shape": [
2041
- 2304
2042
  ],
2043
  "dtype": "F32",
2044
  "chunks": [
@@ -2047,103 +2040,118 @@
2047
  0
2048
  ],
2049
  "shape": [
2050
- 1152
2051
  ],
2052
  "filename_index": 0
2053
  },
2054
  {
2055
  "offsets": [
2056
- 1152
2057
  ],
2058
  "shape": [
2059
- 1152
2060
  ],
2061
  "filename_index": 1
2062
  }
2063
  ]
2064
  },
2065
- "h.9.ln_2.weight": {
2066
  "type": "Distributed",
2067
  "shape": [
2068
- 768
 
2069
  ],
2070
  "dtype": "F32",
2071
  "chunks": [
2072
  {
2073
  "offsets": [
 
2074
  0
2075
  ],
2076
  "shape": [
2077
- 384
 
2078
  ],
2079
  "filename_index": 0
2080
  },
2081
  {
2082
  "offsets": [
2083
- 384
2084
- ],
 
2085
  "shape": [
2086
- 384
 
2087
  ],
2088
  "filename_index": 1
2089
  }
2090
  ]
2091
  },
2092
- "h.6.ln_2.weight": {
2093
  "type": "Distributed",
2094
  "shape": [
 
2095
  768
2096
  ],
2097
  "dtype": "F32",
2098
  "chunks": [
2099
  {
2100
  "offsets": [
 
2101
  0
2102
  ],
2103
  "shape": [
2104
- 384
 
2105
  ],
2106
  "filename_index": 0
2107
  },
2108
  {
2109
  "offsets": [
2110
- 384
 
2111
  ],
2112
  "shape": [
2113
- 384
 
2114
  ],
2115
  "filename_index": 1
2116
  }
2117
  ]
2118
  },
2119
- "h.10.ln_2.bias": {
2120
  "type": "Distributed",
2121
  "shape": [
2122
- 768
 
2123
  ],
2124
  "dtype": "F32",
2125
  "chunks": [
2126
  {
2127
  "offsets": [
 
2128
  0
2129
  ],
2130
  "shape": [
2131
- 384
 
2132
  ],
2133
  "filename_index": 0
2134
  },
2135
  {
2136
  "offsets": [
2137
- 384
 
2138
  ],
2139
  "shape": [
2140
- 384
 
2141
  ],
2142
  "filename_index": 1
2143
  }
2144
  ]
2145
  },
2146
- "h.10.mlp.c_proj.bias": {
2147
  "type": "Distributed",
2148
  "shape": [
2149
  768
@@ -2170,66 +2178,76 @@
2170
  }
2171
  ]
2172
  },
2173
- "h.2.ln_1.bias": {
2174
  "type": "Distributed",
2175
  "shape": [
2176
- 768
 
 
 
2177
  ],
2178
  "dtype": "F32",
2179
  "chunks": [
2180
  {
2181
  "offsets": [
 
 
 
2182
  0
2183
  ],
2184
  "shape": [
2185
- 384
 
 
 
2186
  ],
2187
  "filename_index": 0
2188
  },
2189
  {
2190
  "offsets": [
2191
- 384
 
 
 
2192
  ],
2193
  "shape": [
2194
- 384
 
 
 
2195
  ],
2196
  "filename_index": 1
2197
  }
2198
  ]
2199
  },
2200
- "h.7.mlp.c_proj.weight": {
2201
  "type": "Distributed",
2202
  "shape": [
2203
- 3072,
2204
  768
2205
  ],
2206
  "dtype": "F32",
2207
  "chunks": [
2208
  {
2209
  "offsets": [
2210
- 0,
2211
  0
2212
  ],
2213
  "shape": [
2214
- 1536,
2215
- 768
2216
  ],
2217
  "filename_index": 0
2218
  },
2219
  {
2220
  "offsets": [
2221
- 1536,
2222
- 0
2223
  ],
2224
  "shape": [
2225
- 1536,
2226
- 768
2227
  ],
2228
  "filename_index": 1
2229
  }
2230
  ]
2231
  },
2232
- "h.3.mlp.c_proj.bias": {
2233
  "type": "Distributed",
2234
  "shape": [
2235
  768
@@ -2256,7 +2274,7 @@
2256
  }
2257
  ]
2258
  },
2259
- "h.1.ln_1.bias": {
2260
  "type": "Distributed",
2261
  "shape": [
2262
  768
@@ -2283,7 +2301,7 @@
2283
  }
2284
  ]
2285
  },
2286
- "h.0.ln_2.bias": {
2287
  "type": "Distributed",
2288
  "shape": [
2289
  768
@@ -2310,7 +2328,7 @@
2310
  }
2311
  ]
2312
  },
2313
- "h.4.ln_2.bias": {
2314
  "type": "Distributed",
2315
  "shape": [
2316
  768
@@ -2337,145 +2355,88 @@
2337
  }
2338
  ]
2339
  },
2340
- "h.6.attn.c_attn.weight": {
2341
  "type": "Distributed",
2342
  "shape": [
2343
- 768,
2344
  2304
2345
  ],
2346
  "dtype": "F32",
2347
  "chunks": [
2348
  {
2349
  "offsets": [
2350
- 0,
2351
  0
2352
  ],
2353
  "shape": [
2354
- 768,
2355
  1152
2356
  ],
2357
  "filename_index": 0
2358
  },
2359
  {
2360
  "offsets": [
2361
- 0,
2362
  1152
2363
  ],
2364
  "shape": [
2365
- 768,
2366
  1152
2367
  ],
2368
  "filename_index": 1
2369
  }
2370
  ]
2371
  },
2372
- "h.6.attn.bias": {
2373
- "type": "Distributed",
2374
- "shape": [
2375
- 1,
2376
- 1,
2377
- 1024,
2378
- 1024
2379
- ],
2380
- "dtype": "F32",
2381
- "chunks": [
2382
- {
2383
- "offsets": [
2384
- 0,
2385
- 0,
2386
- 0,
2387
- 0
2388
- ],
2389
- "shape": [
2390
- 1,
2391
- 1,
2392
- 1024,
2393
- 512
2394
- ],
2395
- "filename_index": 0
2396
- },
2397
- {
2398
- "offsets": [
2399
- 0,
2400
- 0,
2401
- 0,
2402
- 512
2403
- ],
2404
- "shape": [
2405
- 1,
2406
- 1,
2407
- 1024,
2408
- 512
2409
- ],
2410
- "filename_index": 1
2411
- }
2412
- ]
2413
- },
2414
- "h.9.mlp.c_proj.weight": {
2415
  "type": "Distributed",
2416
  "shape": [
2417
- 3072,
2418
  768
2419
  ],
2420
  "dtype": "F32",
2421
  "chunks": [
2422
  {
2423
  "offsets": [
2424
- 0,
2425
  0
2426
  ],
2427
  "shape": [
2428
- 1536,
2429
- 768
2430
  ],
2431
  "filename_index": 0
2432
  },
2433
  {
2434
  "offsets": [
2435
- 1536,
2436
- 0
2437
  ],
2438
  "shape": [
2439
- 1536,
2440
- 768
2441
  ],
2442
  "filename_index": 1
2443
  }
2444
  ]
2445
  },
2446
- "h.7.attn.c_attn.weight": {
2447
  "type": "Distributed",
2448
  "shape": [
2449
- 768,
2450
- 2304
2451
  ],
2452
  "dtype": "F32",
2453
  "chunks": [
2454
  {
2455
  "offsets": [
2456
- 0,
2457
  0
2458
  ],
2459
  "shape": [
2460
- 768,
2461
- 1152
2462
  ],
2463
  "filename_index": 0
2464
  },
2465
  {
2466
  "offsets": [
2467
- 0,
2468
- 1152
2469
  ],
2470
  "shape": [
2471
- 768,
2472
- 1152
2473
  ],
2474
  "filename_index": 1
2475
  }
2476
  ]
2477
  },
2478
- "h.3.ln_2.weight": {
2479
  "type": "Distributed",
2480
  "shape": [
2481
  768
@@ -2502,37 +2463,52 @@
2502
  }
2503
  ]
2504
  },
2505
- "h.0.ln_2.weight": {
2506
  "type": "Distributed",
2507
  "shape": [
2508
- 768
 
 
 
2509
  ],
2510
  "dtype": "F32",
2511
  "chunks": [
2512
  {
2513
  "offsets": [
 
 
 
2514
  0
2515
  ],
2516
  "shape": [
2517
- 384
 
 
 
2518
  ],
2519
  "filename_index": 0
2520
  },
2521
  {
2522
  "offsets": [
2523
- 384
 
 
 
2524
  ],
2525
  "shape": [
2526
- 384
 
 
 
2527
  ],
2528
  "filename_index": 1
2529
  }
2530
  ]
2531
  },
2532
- "h.3.ln_1.weight": {
2533
  "type": "Distributed",
2534
  "shape": [
2535
- 768
2536
  ],
2537
  "dtype": "F32",
2538
  "chunks": [
@@ -2541,25 +2517,25 @@
2541
  0
2542
  ],
2543
  "shape": [
2544
- 384
2545
  ],
2546
  "filename_index": 0
2547
  },
2548
  {
2549
  "offsets": [
2550
- 384
2551
  ],
2552
  "shape": [
2553
- 384
2554
  ],
2555
  "filename_index": 1
2556
  }
2557
  ]
2558
  },
2559
- "h.11.attn.c_attn.bias": {
2560
  "type": "Distributed",
2561
  "shape": [
2562
- 2304
2563
  ],
2564
  "dtype": "F32",
2565
  "chunks": [
@@ -2568,49 +2544,54 @@
2568
  0
2569
  ],
2570
  "shape": [
2571
- 1152
2572
  ],
2573
  "filename_index": 0
2574
  },
2575
  {
2576
  "offsets": [
2577
- 1152
2578
  ],
2579
  "shape": [
2580
- 1152
2581
  ],
2582
  "filename_index": 1
2583
  }
2584
  ]
2585
  },
2586
- "h.1.ln_1.weight": {
2587
  "type": "Distributed",
2588
  "shape": [
2589
- 768
 
2590
  ],
2591
  "dtype": "F32",
2592
  "chunks": [
2593
  {
2594
  "offsets": [
 
2595
  0
2596
  ],
2597
  "shape": [
2598
- 384
 
2599
  ],
2600
  "filename_index": 0
2601
  },
2602
  {
2603
  "offsets": [
2604
- 384
 
2605
  ],
2606
  "shape": [
2607
- 384
 
2608
  ],
2609
  "filename_index": 1
2610
  }
2611
  ]
2612
  },
2613
- "h.8.attn.bias": {
2614
  "type": "Distributed",
2615
  "shape": [
2616
  1,
@@ -2652,34 +2633,39 @@
2652
  }
2653
  ]
2654
  },
2655
- "h.2.attn.c_proj.bias": {
2656
  "type": "Distributed",
2657
  "shape": [
2658
- 768
 
2659
  ],
2660
  "dtype": "F32",
2661
  "chunks": [
2662
  {
2663
  "offsets": [
 
2664
  0
2665
  ],
2666
  "shape": [
2667
- 384
 
2668
  ],
2669
  "filename_index": 0
2670
  },
2671
  {
2672
  "offsets": [
2673
- 384
 
2674
  ],
2675
  "shape": [
2676
- 384
 
2677
  ],
2678
  "filename_index": 1
2679
  }
2680
  ]
2681
  },
2682
- "h.7.ln_2.bias": {
2683
  "type": "Distributed",
2684
  "shape": [
2685
  768
@@ -2706,11 +2692,11 @@
2706
  }
2707
  ]
2708
  },
2709
- "h.7.mlp.c_fc.weight": {
2710
  "type": "Distributed",
2711
  "shape": [
2712
  768,
2713
- 3072
2714
  ],
2715
  "dtype": "F32",
2716
  "chunks": [
@@ -2721,66 +2707,51 @@
2721
  ],
2722
  "shape": [
2723
  768,
2724
- 1536
2725
  ],
2726
  "filename_index": 0
2727
  },
2728
  {
2729
  "offsets": [
2730
  0,
2731
- 1536
2732
  ],
2733
  "shape": [
2734
  768,
2735
- 1536
2736
  ],
2737
  "filename_index": 1
2738
  }
2739
  ]
2740
  },
2741
- "h.10.attn.bias": {
2742
  "type": "Distributed",
2743
  "shape": [
2744
- 1,
2745
- 1,
2746
- 1024,
2747
- 1024
2748
  ],
2749
  "dtype": "F32",
2750
  "chunks": [
2751
  {
2752
  "offsets": [
2753
- 0,
2754
- 0,
2755
- 0,
2756
  0
2757
  ],
2758
  "shape": [
2759
- 1,
2760
- 1,
2761
- 1024,
2762
- 512
2763
  ],
2764
  "filename_index": 0
2765
  },
2766
  {
2767
  "offsets": [
2768
- 0,
2769
- 0,
2770
- 0,
2771
- 512
2772
  ],
2773
  "shape": [
2774
- 1,
2775
- 1,
2776
- 1024,
2777
- 512
2778
  ],
2779
  "filename_index": 1
2780
  }
2781
  ]
2782
  },
2783
- "h.7.ln_1.bias": {
2784
  "type": "Distributed",
2785
  "shape": [
2786
  768
@@ -2807,53 +2778,38 @@
2807
  }
2808
  ]
2809
  },
2810
- "h.0.attn.bias": {
2811
  "type": "Distributed",
2812
  "shape": [
2813
- 1,
2814
- 1,
2815
- 1024,
2816
- 1024
2817
  ],
2818
  "dtype": "F32",
2819
  "chunks": [
2820
  {
2821
  "offsets": [
2822
- 0,
2823
- 0,
2824
- 0,
2825
  0
2826
  ],
2827
  "shape": [
2828
- 1,
2829
- 1,
2830
- 1024,
2831
- 512
2832
  ],
2833
  "filename_index": 0
2834
  },
2835
  {
2836
  "offsets": [
2837
- 0,
2838
- 0,
2839
- 0,
2840
- 512
2841
  ],
2842
  "shape": [
2843
- 1,
2844
- 1,
2845
- 1024,
2846
- 512
2847
  ],
2848
  "filename_index": 1
2849
  }
2850
  ]
2851
  },
2852
- "h.6.mlp.c_fc.weight": {
2853
  "type": "Distributed",
2854
  "shape": [
2855
- 768,
2856
- 3072
2857
  ],
2858
  "dtype": "F32",
2859
  "chunks": [
@@ -2863,55 +2819,60 @@
2863
  0
2864
  ],
2865
  "shape": [
2866
- 768,
2867
- 1536
2868
  ],
2869
  "filename_index": 0
2870
  },
2871
  {
2872
  "offsets": [
2873
- 0,
2874
- 1536
2875
  ],
2876
  "shape": [
2877
- 768,
2878
- 1536
2879
  ],
2880
  "filename_index": 1
2881
  }
2882
  ]
2883
  },
2884
- "h.7.ln_1.weight": {
2885
  "type": "Distributed",
2886
  "shape": [
2887
- 768
 
2888
  ],
2889
  "dtype": "F32",
2890
  "chunks": [
2891
  {
2892
  "offsets": [
 
2893
  0
2894
  ],
2895
  "shape": [
2896
- 384
 
2897
  ],
2898
  "filename_index": 0
2899
  },
2900
  {
2901
  "offsets": [
2902
- 384
 
2903
  ],
2904
  "shape": [
2905
- 384
 
2906
  ],
2907
  "filename_index": 1
2908
  }
2909
  ]
2910
  },
2911
- "h.3.mlp.c_fc.bias": {
2912
  "type": "Distributed",
2913
  "shape": [
2914
- 3072
2915
  ],
2916
  "dtype": "F32",
2917
  "chunks": [
@@ -2920,49 +2881,54 @@
2920
  0
2921
  ],
2922
  "shape": [
2923
- 1536
2924
  ],
2925
  "filename_index": 0
2926
  },
2927
  {
2928
  "offsets": [
2929
- 1536
2930
  ],
2931
  "shape": [
2932
- 1536
2933
  ],
2934
  "filename_index": 1
2935
  }
2936
  ]
2937
  },
2938
- "h.5.attn.c_proj.bias": {
2939
  "type": "Distributed",
2940
  "shape": [
2941
- 768
 
2942
  ],
2943
  "dtype": "F32",
2944
  "chunks": [
2945
  {
2946
  "offsets": [
 
2947
  0
2948
  ],
2949
  "shape": [
2950
- 384
 
2951
  ],
2952
  "filename_index": 0
2953
  },
2954
  {
2955
  "offsets": [
2956
- 384
 
2957
  ],
2958
  "shape": [
2959
- 384
 
2960
  ],
2961
  "filename_index": 1
2962
  }
2963
  ]
2964
  },
2965
- "h.11.attn.bias": {
2966
  "type": "Distributed",
2967
  "shape": [
2968
  1,
@@ -3004,11 +2970,11 @@
3004
  }
3005
  ]
3006
  },
3007
- "h.8.attn.c_attn.weight": {
3008
  "type": "Distributed",
3009
  "shape": [
3010
- 768,
3011
- 2304
3012
  ],
3013
  "dtype": "F32",
3014
  "chunks": [
@@ -3018,25 +2984,25 @@
3018
  0
3019
  ],
3020
  "shape": [
3021
- 768,
3022
- 1152
3023
  ],
3024
  "filename_index": 0
3025
  },
3026
  {
3027
  "offsets": [
3028
- 0,
3029
- 1152
3030
  ],
3031
  "shape": [
3032
- 768,
3033
- 1152
3034
  ],
3035
  "filename_index": 1
3036
  }
3037
  ]
3038
  },
3039
- "h.9.attn.c_proj.bias": {
3040
  "type": "Distributed",
3041
  "shape": [
3042
  768
@@ -3063,49 +3029,34 @@
3063
  }
3064
  ]
3065
  },
3066
- "h.7.attn.bias": {
3067
  "type": "Distributed",
3068
  "shape": [
3069
- 1,
3070
- 1,
3071
- 1024,
3072
- 1024
3073
  ],
3074
  "dtype": "F32",
3075
  "chunks": [
3076
  {
3077
  "offsets": [
3078
- 0,
3079
- 0,
3080
- 0,
3081
  0
3082
  ],
3083
  "shape": [
3084
- 1,
3085
- 1,
3086
- 1024,
3087
- 512
3088
  ],
3089
  "filename_index": 0
3090
  },
3091
  {
3092
  "offsets": [
3093
- 0,
3094
- 0,
3095
- 0,
3096
- 512
3097
  ],
3098
  "shape": [
3099
- 1,
3100
- 1,
3101
- 1024,
3102
- 512
3103
  ],
3104
  "filename_index": 1
3105
  }
3106
  ]
3107
  },
3108
- "h.3.attn.c_proj.weight": {
3109
  "type": "Distributed",
3110
  "shape": [
3111
  768,
@@ -3137,61 +3088,81 @@
3137
  }
3138
  ]
3139
  },
3140
- "h.11.mlp.c_proj.bias": {
3141
  "type": "Distributed",
3142
  "shape": [
3143
- 768
 
 
 
3144
  ],
3145
  "dtype": "F32",
3146
  "chunks": [
3147
  {
3148
  "offsets": [
 
 
 
3149
  0
3150
  ],
3151
  "shape": [
3152
- 384
 
 
 
3153
  ],
3154
  "filename_index": 0
3155
  },
3156
  {
3157
  "offsets": [
3158
- 384
 
 
 
3159
  ],
3160
  "shape": [
3161
- 384
 
 
 
3162
  ],
3163
  "filename_index": 1
3164
  }
3165
  ]
3166
  },
3167
- "h.3.attn.c_proj.bias": {
3168
  "type": "Distributed",
3169
  "shape": [
 
3170
  768
3171
  ],
3172
  "dtype": "F32",
3173
  "chunks": [
3174
  {
3175
  "offsets": [
 
3176
  0
3177
  ],
3178
  "shape": [
3179
- 384
 
3180
  ],
3181
  "filename_index": 0
3182
  },
3183
  {
3184
  "offsets": [
3185
- 384
 
3186
  ],
3187
  "shape": [
3188
- 384
 
3189
  ],
3190
  "filename_index": 1
3191
  }
3192
  ]
3193
  },
3194
- "h.7.ln_2.weight": {
3195
  "type": "Distributed",
3196
  "shape": [
3197
  768
@@ -3218,64 +3189,74 @@
3218
  }
3219
  ]
3220
  },
3221
- "h.9.ln_1.bias": {
3222
  "type": "Distributed",
3223
  "shape": [
 
3224
  768
3225
  ],
3226
  "dtype": "F32",
3227
  "chunks": [
3228
  {
3229
  "offsets": [
 
3230
  0
3231
  ],
3232
  "shape": [
3233
- 384
 
3234
  ],
3235
  "filename_index": 0
3236
  },
3237
  {
3238
  "offsets": [
3239
- 384
 
3240
  ],
3241
  "shape": [
3242
- 384
 
3243
  ],
3244
  "filename_index": 1
3245
  }
3246
  ]
3247
  },
3248
- "h.11.ln_1.weight": {
3249
  "type": "Distributed",
3250
  "shape": [
 
3251
  768
3252
  ],
3253
  "dtype": "F32",
3254
  "chunks": [
3255
  {
3256
  "offsets": [
 
3257
  0
3258
  ],
3259
  "shape": [
 
3260
  384
3261
  ],
3262
  "filename_index": 0
3263
  },
3264
  {
3265
  "offsets": [
 
3266
  384
3267
  ],
3268
  "shape": [
 
3269
  384
3270
  ],
3271
  "filename_index": 1
3272
  }
3273
  ]
3274
  },
3275
- "h.5.mlp.c_fc.bias": {
3276
  "type": "Distributed",
3277
  "shape": [
3278
- 3072
3279
  ],
3280
  "dtype": "F32",
3281
  "chunks": [
@@ -3284,25 +3265,25 @@
3284
  0
3285
  ],
3286
  "shape": [
3287
- 1536
3288
  ],
3289
  "filename_index": 0
3290
  },
3291
  {
3292
  "offsets": [
3293
- 1536
3294
  ],
3295
  "shape": [
3296
- 1536
3297
  ],
3298
  "filename_index": 1
3299
  }
3300
  ]
3301
  },
3302
- "h.1.ln_2.bias": {
3303
  "type": "Distributed",
3304
  "shape": [
3305
- 768
3306
  ],
3307
  "dtype": "F32",
3308
  "chunks": [
@@ -3311,80 +3292,100 @@
3311
  0
3312
  ],
3313
  "shape": [
3314
- 384
3315
  ],
3316
  "filename_index": 0
3317
  },
3318
  {
3319
  "offsets": [
3320
- 384
3321
  ],
3322
  "shape": [
3323
- 384
3324
  ],
3325
  "filename_index": 1
3326
  }
3327
  ]
3328
  },
3329
- "h.8.attn.c_attn.bias": {
3330
  "type": "Distributed",
3331
  "shape": [
3332
- 2304
 
3333
  ],
3334
  "dtype": "F32",
3335
  "chunks": [
3336
  {
3337
  "offsets": [
 
3338
  0
3339
  ],
3340
  "shape": [
3341
- 1152
 
3342
  ],
3343
  "filename_index": 0
3344
  },
3345
  {
3346
  "offsets": [
3347
- 1152
 
3348
  ],
3349
  "shape": [
3350
- 1152
 
3351
  ],
3352
  "filename_index": 1
3353
  }
3354
  ]
3355
  },
3356
- "h.8.mlp.c_proj.bias": {
3357
  "type": "Distributed",
3358
  "shape": [
3359
- 768
 
 
 
3360
  ],
3361
  "dtype": "F32",
3362
  "chunks": [
3363
  {
3364
  "offsets": [
 
 
 
3365
  0
3366
  ],
3367
  "shape": [
3368
- 384
 
 
 
3369
  ],
3370
  "filename_index": 0
3371
  },
3372
  {
3373
  "offsets": [
3374
- 384
 
 
 
3375
  ],
3376
  "shape": [
3377
- 384
 
 
 
3378
  ],
3379
  "filename_index": 1
3380
  }
3381
  ]
3382
  },
3383
- "h.5.attn.c_attn.weight": {
3384
  "type": "Distributed",
3385
  "shape": [
3386
  768,
3387
- 2304
3388
  ],
3389
  "dtype": "F32",
3390
  "chunks": [
@@ -3395,66 +3396,78 @@
3395
  ],
3396
  "shape": [
3397
  768,
3398
- 1152
3399
  ],
3400
  "filename_index": 0
3401
  },
3402
  {
3403
  "offsets": [
3404
  0,
3405
- 1152
3406
  ],
3407
  "shape": [
3408
  768,
3409
- 1152
3410
  ],
3411
  "filename_index": 1
3412
  }
3413
  ]
3414
  },
3415
- "h.4.attn.bias": {
3416
  "type": "Distributed",
3417
  "shape": [
3418
- 1,
3419
- 1,
3420
- 1024,
3421
- 1024
3422
  ],
3423
  "dtype": "F32",
3424
  "chunks": [
3425
  {
3426
  "offsets": [
3427
- 0,
3428
- 0,
3429
- 0,
3430
  0
3431
  ],
3432
  "shape": [
3433
- 1,
3434
- 1,
3435
- 1024,
3436
- 512
3437
  ],
3438
  "filename_index": 0
3439
  },
3440
  {
3441
  "offsets": [
3442
- 0,
3443
- 0,
3444
- 0,
3445
- 512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3446
  ],
3447
  "shape": [
3448
- 1,
3449
- 1,
3450
- 1024,
3451
- 512
3452
  ],
3453
  "filename_index": 1
3454
  }
3455
  ]
3456
  },
3457
- "h.5.mlp.c_proj.bias": {
3458
  "type": "Distributed",
3459
  "shape": [
3460
  768
@@ -3481,37 +3494,42 @@
3481
  }
3482
  ]
3483
  },
3484
- "h.0.mlp.c_fc.bias": {
3485
  "type": "Distributed",
3486
  "shape": [
3487
- 3072
 
3488
  ],
3489
  "dtype": "F32",
3490
  "chunks": [
3491
  {
3492
  "offsets": [
 
3493
  0
3494
  ],
3495
  "shape": [
3496
- 1536
 
3497
  ],
3498
  "filename_index": 0
3499
  },
3500
  {
3501
  "offsets": [
3502
- 1536
 
3503
  ],
3504
  "shape": [
3505
- 1536
 
3506
  ],
3507
  "filename_index": 1
3508
  }
3509
  ]
3510
  },
3511
- "h.1.mlp.c_proj.weight": {
3512
  "type": "Distributed",
3513
  "shape": [
3514
- 3072,
3515
  768
3516
  ],
3517
  "dtype": "F32",
@@ -3522,168 +3540,143 @@
3522
  0
3523
  ],
3524
  "shape": [
3525
- 1536,
3526
  768
3527
  ],
3528
  "filename_index": 0
3529
  },
3530
  {
3531
  "offsets": [
3532
- 1536,
3533
  0
3534
  ],
3535
  "shape": [
3536
- 1536,
3537
  768
3538
  ],
3539
  "filename_index": 1
3540
  }
3541
  ]
3542
  },
3543
- "h.6.ln_1.weight": {
3544
  "type": "Distributed",
3545
  "shape": [
3546
- 768
 
3547
  ],
3548
  "dtype": "F32",
3549
  "chunks": [
3550
  {
3551
  "offsets": [
 
3552
  0
3553
  ],
3554
  "shape": [
3555
- 384
 
3556
  ],
3557
  "filename_index": 0
3558
  },
3559
  {
3560
  "offsets": [
3561
- 384
 
3562
  ],
3563
  "shape": [
3564
- 384
 
3565
  ],
3566
  "filename_index": 1
3567
  }
3568
  ]
3569
  },
3570
- "h.5.attn.bias": {
3571
  "type": "Distributed",
3572
  "shape": [
3573
- 1,
3574
- 1,
3575
- 1024,
3576
- 1024
3577
  ],
3578
  "dtype": "F32",
3579
  "chunks": [
3580
  {
3581
  "offsets": [
3582
- 0,
3583
- 0,
3584
  0,
3585
  0
3586
  ],
3587
  "shape": [
3588
- 1,
3589
- 1,
3590
- 1024,
3591
- 512
3592
  ],
3593
  "filename_index": 0
3594
  },
3595
  {
3596
  "offsets": [
3597
  0,
3598
- 0,
3599
- 0,
3600
- 512
3601
  ],
3602
  "shape": [
3603
- 1,
3604
- 1,
3605
- 1024,
3606
- 512
3607
  ],
3608
  "filename_index": 1
3609
  }
3610
  ]
3611
  },
3612
- "h.2.attn.bias": {
3613
  "type": "Distributed",
3614
  "shape": [
3615
- 1,
3616
- 1,
3617
- 1024,
3618
- 1024
3619
  ],
3620
  "dtype": "F32",
3621
  "chunks": [
3622
  {
3623
  "offsets": [
3624
- 0,
3625
- 0,
3626
- 0,
3627
  0
3628
  ],
3629
  "shape": [
3630
- 1,
3631
- 1,
3632
- 1024,
3633
- 512
3634
  ],
3635
  "filename_index": 0
3636
  },
3637
  {
3638
  "offsets": [
3639
- 0,
3640
- 0,
3641
- 0,
3642
- 512
3643
  ],
3644
  "shape": [
3645
- 1,
3646
- 1,
3647
- 1024,
3648
- 512
3649
  ],
3650
  "filename_index": 1
3651
  }
3652
  ]
3653
  },
3654
- "h.8.mlp.c_proj.weight": {
3655
  "type": "Distributed",
3656
  "shape": [
3657
- 3072,
3658
  768
3659
  ],
3660
  "dtype": "F32",
3661
  "chunks": [
3662
  {
3663
  "offsets": [
3664
- 0,
3665
  0
3666
  ],
3667
  "shape": [
3668
- 1536,
3669
- 768
3670
  ],
3671
  "filename_index": 0
3672
  },
3673
  {
3674
  "offsets": [
3675
- 1536,
3676
- 0
3677
  ],
3678
  "shape": [
3679
- 1536,
3680
- 768
3681
  ],
3682
  "filename_index": 1
3683
  }
3684
  ]
3685
  },
3686
- "h.0.ln_1.bias": {
3687
  "type": "Distributed",
3688
  "shape": [
3689
  768
@@ -3710,34 +3703,39 @@
3710
  }
3711
  ]
3712
  },
3713
- "h.5.attn.c_attn.bias": {
3714
  "type": "Distributed",
3715
  "shape": [
3716
- 2304
 
3717
  ],
3718
  "dtype": "F32",
3719
  "chunks": [
3720
  {
3721
  "offsets": [
 
3722
  0
3723
  ],
3724
  "shape": [
3725
- 1152
 
3726
  ],
3727
  "filename_index": 0
3728
  },
3729
  {
3730
  "offsets": [
3731
- 1152
 
3732
  ],
3733
  "shape": [
3734
- 1152
 
3735
  ],
3736
  "filename_index": 1
3737
  }
3738
  ]
3739
  },
3740
- "h.11.ln_1.bias": {
3741
  "type": "Distributed",
3742
  "shape": [
3743
  768
@@ -3764,71 +3762,61 @@
3764
  }
3765
  ]
3766
  },
3767
- "h.11.attn.c_proj.weight": {
3768
  "type": "Distributed",
3769
  "shape": [
3770
- 768,
3771
- 768
3772
  ],
3773
  "dtype": "F32",
3774
  "chunks": [
3775
  {
3776
  "offsets": [
3777
- 0,
3778
  0
3779
  ],
3780
  "shape": [
3781
- 384,
3782
- 768
3783
  ],
3784
  "filename_index": 0
3785
  },
3786
  {
3787
  "offsets": [
3788
- 384,
3789
- 0
3790
  ],
3791
  "shape": [
3792
- 384,
3793
- 768
3794
  ],
3795
  "filename_index": 1
3796
  }
3797
  ]
3798
  },
3799
- "h.10.attn.c_attn.weight": {
3800
  "type": "Distributed",
3801
  "shape": [
3802
- 768,
3803
- 2304
3804
  ],
3805
  "dtype": "F32",
3806
  "chunks": [
3807
  {
3808
  "offsets": [
3809
- 0,
3810
  0
3811
  ],
3812
  "shape": [
3813
- 768,
3814
- 1152
3815
  ],
3816
  "filename_index": 0
3817
  },
3818
  {
3819
  "offsets": [
3820
- 0,
3821
- 1152
3822
  ],
3823
  "shape": [
3824
- 768,
3825
- 1152
3826
  ],
3827
  "filename_index": 1
3828
  }
3829
  ]
3830
  },
3831
- "h.1.mlp.c_proj.bias": {
3832
  "type": "Distributed",
3833
  "shape": [
3834
  768
@@ -3855,7 +3843,7 @@
3855
  }
3856
  ]
3857
  },
3858
- "h.3.ln_1.bias": {
3859
  "type": "Distributed",
3860
  "shape": [
3861
  768
@@ -3882,34 +3870,49 @@
3882
  }
3883
  ]
3884
  },
3885
- "h.6.ln_2.bias": {
3886
  "type": "Distributed",
3887
  "shape": [
3888
- 768
 
 
 
3889
  ],
3890
  "dtype": "F32",
3891
  "chunks": [
3892
  {
3893
  "offsets": [
 
 
 
3894
  0
3895
  ],
3896
  "shape": [
3897
- 384
 
 
 
3898
  ],
3899
  "filename_index": 0
3900
  },
3901
  {
3902
  "offsets": [
3903
- 384
 
 
 
3904
  ],
3905
  "shape": [
3906
- 384
 
 
 
3907
  ],
3908
  "filename_index": 1
3909
  }
3910
  ]
3911
  },
3912
- "h.2.ln_1.weight": {
3913
  "type": "Distributed",
3914
  "shape": [
3915
  768
@@ -3936,7 +3939,7 @@
3936
  }
3937
  ]
3938
  },
3939
- "h.4.ln_1.weight": {
3940
  "type": "Distributed",
3941
  "shape": [
3942
  768
@@ -3963,7 +3966,7 @@
3963
  }
3964
  ]
3965
  },
3966
- "h.10.ln_2.weight": {
3967
  "type": "Distributed",
3968
  "shape": [
3969
  768
@@ -3990,39 +3993,34 @@
3990
  }
3991
  ]
3992
  },
3993
- "h.2.attn.c_proj.weight": {
3994
  "type": "Distributed",
3995
  "shape": [
3996
- 768,
3997
  768
3998
  ],
3999
  "dtype": "F32",
4000
  "chunks": [
4001
  {
4002
  "offsets": [
4003
- 0,
4004
  0
4005
  ],
4006
  "shape": [
4007
- 384,
4008
- 768
4009
  ],
4010
  "filename_index": 0
4011
  },
4012
  {
4013
  "offsets": [
4014
- 384,
4015
- 0
4016
  ],
4017
  "shape": [
4018
- 384,
4019
- 768
4020
  ],
4021
  "filename_index": 1
4022
  }
4023
  ]
4024
  },
4025
- "h.11.ln_2.weight": {
4026
  "type": "Distributed",
4027
  "shape": [
4028
  768
@@ -4049,81 +4047,93 @@
4049
  }
4050
  ]
4051
  },
4052
- "h.8.attn.c_proj.weight": {
4053
  "type": "Distributed",
4054
  "shape": [
4055
- 768,
4056
- 768
4057
  ],
4058
  "dtype": "F32",
4059
  "chunks": [
4060
  {
4061
  "offsets": [
4062
- 0,
4063
  0
4064
  ],
4065
  "shape": [
4066
- 384,
4067
- 768
4068
  ],
4069
  "filename_index": 0
4070
  },
4071
  {
4072
  "offsets": [
4073
- 384,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4074
  0
4075
  ],
4076
  "shape": [
4077
- 384,
4078
- 768
 
 
 
 
 
 
 
 
4079
  ],
4080
  "filename_index": 1
4081
  }
4082
  ]
4083
  },
4084
- "h.9.attn.bias": {
4085
  "type": "Distributed",
4086
  "shape": [
4087
- 1,
4088
- 1,
4089
- 1024,
4090
- 1024
4091
  ],
4092
  "dtype": "F32",
4093
  "chunks": [
4094
  {
4095
  "offsets": [
4096
- 0,
4097
- 0,
4098
  0,
4099
  0
4100
  ],
4101
  "shape": [
4102
- 1,
4103
- 1,
4104
- 1024,
4105
- 512
4106
  ],
4107
  "filename_index": 0
4108
  },
4109
  {
4110
  "offsets": [
4111
  0,
4112
- 0,
4113
- 0,
4114
- 512
4115
  ],
4116
  "shape": [
4117
- 1,
4118
- 1,
4119
- 1024,
4120
- 512
4121
  ],
4122
  "filename_index": 1
4123
  }
4124
  ]
4125
  },
4126
- "h.1.ln_2.weight": {
4127
  "type": "Distributed",
4128
  "shape": [
4129
  768
@@ -4150,7 +4160,7 @@
4150
  }
4151
  ]
4152
  },
4153
- "h.4.ln_2.weight": {
4154
  "type": "Distributed",
4155
  "shape": [
4156
  768
@@ -4177,42 +4187,37 @@
4177
  }
4178
  ]
4179
  },
4180
- "h.3.attn.c_attn.weight": {
4181
  "type": "Distributed",
4182
  "shape": [
4183
- 768,
4184
- 2304
4185
  ],
4186
  "dtype": "F32",
4187
  "chunks": [
4188
  {
4189
  "offsets": [
4190
- 0,
4191
  0
4192
  ],
4193
  "shape": [
4194
- 768,
4195
- 1152
4196
  ],
4197
  "filename_index": 0
4198
  },
4199
  {
4200
  "offsets": [
4201
- 0,
4202
- 1152
4203
  ],
4204
  "shape": [
4205
- 768,
4206
- 1152
4207
  ],
4208
  "filename_index": 1
4209
  }
4210
  ]
4211
  },
4212
- "h.8.mlp.c_fc.bias": {
4213
  "type": "Distributed",
4214
  "shape": [
4215
- 3072
4216
  ],
4217
  "dtype": "F32",
4218
  "chunks": [
@@ -4221,22 +4226,22 @@
4221
  0
4222
  ],
4223
  "shape": [
4224
- 1536
4225
  ],
4226
  "filename_index": 0
4227
  },
4228
  {
4229
  "offsets": [
4230
- 1536
4231
  ],
4232
  "shape": [
4233
- 1536
4234
  ],
4235
  "filename_index": 1
4236
  }
4237
  ]
4238
  },
4239
- "h.10.attn.c_proj.bias": {
4240
  "type": "Distributed",
4241
  "shape": [
4242
  768
@@ -4263,39 +4268,34 @@
4263
  }
4264
  ]
4265
  },
4266
- "h.9.attn.c_attn.weight": {
4267
  "type": "Distributed",
4268
  "shape": [
4269
- 768,
4270
- 2304
4271
  ],
4272
  "dtype": "F32",
4273
  "chunks": [
4274
  {
4275
  "offsets": [
4276
- 0,
4277
  0
4278
  ],
4279
  "shape": [
4280
- 768,
4281
- 1152
4282
  ],
4283
  "filename_index": 0
4284
  },
4285
  {
4286
  "offsets": [
4287
- 0,
4288
- 1152
4289
  ],
4290
  "shape": [
4291
- 768,
4292
- 1152
4293
  ],
4294
  "filename_index": 1
4295
  }
4296
  ]
4297
  },
4298
- "h.3.ln_2.bias": {
4299
  "type": "Distributed",
4300
  "shape": [
4301
  768
@@ -4322,37 +4322,42 @@
4322
  }
4323
  ]
4324
  },
4325
- "h.6.attn.c_attn.bias": {
4326
  "type": "Distributed",
4327
  "shape": [
4328
- 2304
 
4329
  ],
4330
  "dtype": "F32",
4331
  "chunks": [
4332
  {
4333
  "offsets": [
 
4334
  0
4335
  ],
4336
  "shape": [
4337
- 1152
 
4338
  ],
4339
  "filename_index": 0
4340
  },
4341
  {
4342
  "offsets": [
4343
- 1152
 
4344
  ],
4345
  "shape": [
4346
- 1152
 
4347
  ],
4348
  "filename_index": 1
4349
  }
4350
  ]
4351
  },
4352
- "h.2.attn.c_attn.bias": {
4353
  "type": "Distributed",
4354
  "shape": [
4355
- 2304
4356
  ],
4357
  "dtype": "F32",
4358
  "chunks": [
@@ -4361,26 +4366,26 @@
4361
  0
4362
  ],
4363
  "shape": [
4364
- 1152
4365
  ],
4366
  "filename_index": 0
4367
  },
4368
  {
4369
  "offsets": [
4370
- 1152
4371
  ],
4372
  "shape": [
4373
- 1152
4374
  ],
4375
  "filename_index": 1
4376
  }
4377
  ]
4378
  },
4379
- "h.10.mlp.c_proj.weight": {
4380
  "type": "Distributed",
4381
  "shape": [
4382
- 3072,
4383
- 768
4384
  ],
4385
  "dtype": "F32",
4386
  "chunks": [
@@ -4390,29 +4395,29 @@
4390
  0
4391
  ],
4392
  "shape": [
4393
- 1536,
4394
- 768
4395
  ],
4396
  "filename_index": 0
4397
  },
4398
  {
4399
  "offsets": [
4400
- 1536,
4401
- 0
4402
  ],
4403
  "shape": [
4404
- 1536,
4405
- 768
4406
  ],
4407
  "filename_index": 1
4408
  }
4409
  ]
4410
  },
4411
- "h.1.attn.c_attn.weight": {
4412
  "type": "Distributed",
4413
  "shape": [
4414
- 768,
4415
- 2304
4416
  ],
4417
  "dtype": "F32",
4418
  "chunks": [
@@ -4422,25 +4427,25 @@
4422
  0
4423
  ],
4424
  "shape": [
4425
- 768,
4426
- 1152
4427
  ],
4428
  "filename_index": 0
4429
  },
4430
  {
4431
  "offsets": [
4432
- 0,
4433
- 1152
4434
  ],
4435
  "shape": [
4436
- 768,
4437
- 1152
4438
  ],
4439
  "filename_index": 1
4440
  }
4441
  ]
4442
  },
4443
- "h.4.attn.c_attn.weight": {
4444
  "type": "Distributed",
4445
  "shape": [
4446
  768,
@@ -4472,7 +4477,7 @@
4472
  }
4473
  ]
4474
  },
4475
- "h.6.attn.c_proj.bias": {
4476
  "type": "Distributed",
4477
  "shape": [
4478
  768
@@ -4499,10 +4504,10 @@
4499
  }
4500
  ]
4501
  },
4502
- "h.0.attn.c_proj.weight": {
4503
  "type": "Distributed",
4504
  "shape": [
4505
- 768,
4506
  768
4507
  ],
4508
  "dtype": "F32",
@@ -4513,28 +4518,28 @@
4513
  0
4514
  ],
4515
  "shape": [
4516
- 384,
4517
  768
4518
  ],
4519
  "filename_index": 0
4520
  },
4521
  {
4522
  "offsets": [
4523
- 384,
4524
  0
4525
  ],
4526
  "shape": [
4527
- 384,
4528
  768
4529
  ],
4530
  "filename_index": 1
4531
  }
4532
  ]
4533
  },
4534
- "h.11.mlp.c_fc.bias": {
4535
  "type": "Distributed",
4536
  "shape": [
4537
- 3072
4538
  ],
4539
  "dtype": "F32",
4540
  "chunks": [
@@ -4543,155 +4548,150 @@
4543
  0
4544
  ],
4545
  "shape": [
4546
- 1536
4547
  ],
4548
  "filename_index": 0
4549
  },
4550
  {
4551
  "offsets": [
4552
- 1536
4553
  ],
4554
  "shape": [
4555
- 1536
4556
  ],
4557
  "filename_index": 1
4558
  }
4559
  ]
4560
  },
4561
- "h.0.attn.c_attn.weight": {
4562
  "type": "Distributed",
4563
  "shape": [
4564
- 768,
4565
- 2304
4566
  ],
4567
  "dtype": "F32",
4568
  "chunks": [
4569
  {
4570
  "offsets": [
4571
- 0,
4572
  0
4573
  ],
4574
  "shape": [
4575
- 768,
4576
- 1152
4577
  ],
4578
  "filename_index": 0
4579
  },
4580
  {
4581
  "offsets": [
4582
- 0,
4583
- 1152
4584
  ],
4585
  "shape": [
4586
- 768,
4587
- 1152
4588
  ],
4589
  "filename_index": 1
4590
  }
4591
  ]
4592
  },
4593
- "h.3.attn.bias": {
4594
  "type": "Distributed",
4595
  "shape": [
4596
- 1,
4597
- 1,
4598
- 1024,
4599
- 1024
4600
  ],
4601
  "dtype": "F32",
4602
  "chunks": [
4603
  {
4604
  "offsets": [
4605
- 0,
4606
- 0,
4607
  0,
4608
  0
4609
  ],
4610
  "shape": [
4611
- 1,
4612
- 1,
4613
- 1024,
4614
- 512
4615
  ],
4616
  "filename_index": 0
4617
  },
4618
  {
4619
  "offsets": [
4620
  0,
4621
- 0,
4622
- 0,
4623
- 512
4624
  ],
4625
  "shape": [
4626
- 1,
4627
- 1,
4628
- 1024,
4629
- 512
4630
  ],
4631
  "filename_index": 1
4632
  }
4633
  ]
4634
  },
4635
- "h.4.attn.c_proj.weight": {
4636
  "type": "Distributed",
4637
  "shape": [
4638
- 768,
4639
  768
4640
  ],
4641
  "dtype": "F32",
4642
  "chunks": [
4643
  {
4644
  "offsets": [
4645
- 0,
4646
  0
4647
  ],
4648
  "shape": [
4649
- 384,
4650
- 768
4651
  ],
4652
  "filename_index": 0
4653
  },
4654
  {
4655
  "offsets": [
4656
- 384,
4657
- 0
4658
  ],
4659
  "shape": [
4660
- 384,
4661
- 768
4662
  ],
4663
  "filename_index": 1
4664
  }
4665
  ]
4666
  },
4667
- "h.8.ln_2.weight": {
4668
  "type": "Distributed",
4669
  "shape": [
4670
- 768
 
 
 
4671
  ],
4672
  "dtype": "F32",
4673
  "chunks": [
4674
  {
4675
  "offsets": [
 
 
 
4676
  0
4677
  ],
4678
  "shape": [
4679
- 384
 
 
 
4680
  ],
4681
  "filename_index": 0
4682
  },
4683
  {
4684
  "offsets": [
4685
- 384
 
 
 
4686
  ],
4687
  "shape": [
4688
- 384
 
 
 
4689
  ],
4690
  "filename_index": 1
4691
  }
4692
  ]
4693
  },
4694
- "h.0.attn.c_proj.bias": {
4695
  "type": "Distributed",
4696
  "shape": [
4697
  768
@@ -4718,11 +4718,11 @@
4718
  }
4719
  ]
4720
  },
4721
- "h.4.mlp.c_proj.weight": {
4722
  "type": "Distributed",
4723
  "shape": [
4724
- 3072,
4725
- 768
4726
  ],
4727
  "dtype": "F32",
4728
  "chunks": [
@@ -4732,19 +4732,19 @@
4732
  0
4733
  ],
4734
  "shape": [
4735
- 1536,
4736
- 768
4737
  ],
4738
  "filename_index": 0
4739
  },
4740
  {
4741
  "offsets": [
4742
- 1536,
4743
- 0
4744
  ],
4745
  "shape": [
4746
- 1536,
4747
- 768
4748
  ],
4749
  "filename_index": 1
4750
  }
 
1
  {
2
  "tensors": {
3
+ "h.3.attn.c_proj.bias": {
4
  "type": "Distributed",
5
  "shape": [
 
6
  768
7
  ],
8
  "dtype": "F32",
9
  "chunks": [
10
  {
11
  "offsets": [
 
12
  0
13
  ],
14
  "shape": [
15
+ 384
 
16
  ],
17
  "filename_index": 0
18
  },
19
  {
20
  "offsets": [
21
+ 384
 
22
  ],
23
  "shape": [
24
+ 384
 
25
  ],
26
  "filename_index": 1
27
  }
28
  ]
29
  },
30
+ "h.2.mlp.c_fc.weight": {
31
  "type": "Distributed",
32
  "shape": [
33
  768,
 
59
  }
60
  ]
61
  },
62
+ "h.5.attn.bias": {
63
  "type": "Distributed",
64
  "shape": [
65
+ 1,
66
+ 1,
67
+ 1024,
68
+ 1024
69
  ],
70
  "dtype": "F32",
71
  "chunks": [
72
  {
73
  "offsets": [
74
+ 0,
75
+ 0,
76
+ 0,
77
  0
78
  ],
79
  "shape": [
80
+ 1,
81
+ 1,
82
+ 1024,
83
+ 512
84
  ],
85
  "filename_index": 0
86
  },
87
  {
88
  "offsets": [
89
+ 0,
90
+ 0,
91
+ 0,
92
+ 512
93
  ],
94
  "shape": [
95
+ 1,
96
+ 1,
97
+ 1024,
98
+ 512
99
  ],
100
  "filename_index": 1
101
  }
102
  ]
103
  },
104
+ "h.7.mlp.c_fc.bias": {
105
  "type": "Distributed",
106
  "shape": [
107
+ 3072
108
  ],
109
  "dtype": "F32",
110
  "chunks": [
 
113
  0
114
  ],
115
  "shape": [
116
+ 1536
117
  ],
118
  "filename_index": 0
119
  },
120
  {
121
  "offsets": [
122
+ 1536
123
  ],
124
  "shape": [
125
+ 1536
126
  ],
127
  "filename_index": 1
128
  }
129
  ]
130
  },
131
+ "h.0.attn.c_attn.weight": {
132
  "type": "Distributed",
133
  "shape": [
134
  768,
 
160
  }
161
  ]
162
  },
163
+ "h.9.ln_2.bias": {
164
  "type": "Distributed",
165
  "shape": [
166
  768
 
187
  }
188
  ]
189
  },
190
+ "h.5.mlp.c_proj.weight": {
191
  "type": "Distributed",
192
  "shape": [
193
+ 3072,
194
  768
195
  ],
196
  "dtype": "F32",
197
  "chunks": [
198
  {
199
  "offsets": [
200
+ 0,
201
  0
202
  ],
203
  "shape": [
204
+ 1536,
205
+ 768
206
  ],
207
  "filename_index": 0
208
  },
209
  {
210
  "offsets": [
211
+ 1536,
212
+ 0
213
  ],
214
  "shape": [
215
+ 1536,
216
+ 768
217
  ],
218
  "filename_index": 1
219
  }
220
  ]
221
  },
222
+ "h.8.mlp.c_fc.weight": {
223
  "type": "Distributed",
224
  "shape": [
225
+ 768,
226
+ 3072
227
  ],
228
  "dtype": "F32",
229
  "chunks": [
230
  {
231
  "offsets": [
232
+ 0,
233
  0
234
  ],
235
  "shape": [
236
+ 768,
237
+ 1536
238
  ],
239
  "filename_index": 0
240
  },
241
  {
242
  "offsets": [
243
+ 0,
244
+ 1536
245
  ],
246
  "shape": [
247
+ 768,
248
+ 1536
249
  ],
250
  "filename_index": 1
251
  }
252
  ]
253
  },
254
+ "wte.weight": {
255
  "type": "Distributed",
256
  "shape": [
257
+ 50257,
258
  768
259
  ],
260
  "dtype": "F32",
261
  "chunks": [
262
  {
263
  "offsets": [
264
+ 0,
265
  0
266
  ],
267
  "shape": [
268
+ 50257,
269
  384
270
  ],
271
  "filename_index": 0
272
  },
273
  {
274
  "offsets": [
275
+ 0,
276
  384
277
  ],
278
  "shape": [
279
+ 50257,
280
  384
281
  ],
282
  "filename_index": 1
283
  }
284
  ]
285
  },
286
+ "h.5.attn.c_proj.weight": {
287
  "type": "Distributed",
288
  "shape": [
289
+ 768,
290
  768
291
  ],
292
  "dtype": "F32",
 
297
  0
298
  ],
299
  "shape": [
300
+ 384,
301
  768
302
  ],
303
  "filename_index": 0
304
  },
305
  {
306
  "offsets": [
307
+ 384,
308
  0
309
  ],
310
  "shape": [
311
+ 384,
312
  768
313
  ],
314
  "filename_index": 1
315
  }
316
  ]
317
  },
318
+ "h.1.ln_1.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  "type": "Distributed",
320
  "shape": [
321
  768
 
342
  }
343
  ]
344
  },
345
+ "h.10.mlp.c_fc.bias": {
346
  "type": "Distributed",
347
  "shape": [
348
+ 3072
349
  ],
350
  "dtype": "F32",
351
  "chunks": [
 
354
  0
355
  ],
356
  "shape": [
357
+ 1536
358
  ],
359
  "filename_index": 0
360
  },
361
  {
362
  "offsets": [
363
+ 1536
364
  ],
365
  "shape": [
366
+ 1536
367
  ],
368
  "filename_index": 1
369
  }
370
  ]
371
  },
372
+ "h.9.mlp.c_proj.weight": {
373
  "type": "Distributed",
374
  "shape": [
375
+ 3072,
376
  768
377
  ],
378
  "dtype": "F32",
379
  "chunks": [
380
  {
381
  "offsets": [
382
+ 0,
383
  0
384
  ],
385
  "shape": [
386
+ 1536,
387
+ 768
388
  ],
389
  "filename_index": 0
390
  },
391
  {
392
  "offsets": [
393
+ 1536,
394
+ 0
395
  ],
396
  "shape": [
397
+ 1536,
398
+ 768
399
  ],
400
  "filename_index": 1
401
  }
402
  ]
403
  },
404
+ "h.2.attn.c_proj.weight": {
405
  "type": "Distributed",
406
  "shape": [
407
+ 768,
408
  768
409
  ],
410
  "dtype": "F32",
 
415
  0
416
  ],
417
  "shape": [
418
+ 384,
419
  768
420
  ],
421
  "filename_index": 0
422
  },
423
  {
424
  "offsets": [
425
+ 384,
426
  0
427
  ],
428
  "shape": [
429
+ 384,
430
  768
431
  ],
432
  "filename_index": 1
433
  }
434
  ]
435
  },
436
+ "h.10.attn.c_attn.weight": {
437
  "type": "Distributed",
438
  "shape": [
439
+ 768,
440
+ 2304
441
  ],
442
  "dtype": "F32",
443
  "chunks": [
444
  {
445
  "offsets": [
446
+ 0,
447
  0
448
  ],
449
  "shape": [
450
+ 768,
451
+ 1152
452
  ],
453
  "filename_index": 0
454
  },
455
  {
456
  "offsets": [
457
+ 0,
458
+ 1152
459
  ],
460
  "shape": [
461
+ 768,
462
+ 1152
463
  ],
464
  "filename_index": 1
465
  }
466
  ]
467
  },
468
+ "h.4.mlp.c_proj.weight": {
469
  "type": "Distributed",
470
  "shape": [
471
+ 3072,
472
+ 768
473
  ],
474
  "dtype": "F32",
475
  "chunks": [
476
  {
477
  "offsets": [
478
+ 0,
479
  0
480
  ],
481
  "shape": [
482
+ 1536,
483
+ 768
484
  ],
485
  "filename_index": 0
486
  },
487
  {
488
  "offsets": [
489
+ 1536,
490
+ 0
491
  ],
492
  "shape": [
493
+ 1536,
494
+ 768
495
  ],
496
  "filename_index": 1
497
  }
498
  ]
499
  },
500
+ "h.11.ln_1.weight": {
501
  "type": "Distributed",
502
  "shape": [
503
+ 768
504
  ],
505
  "dtype": "F32",
506
  "chunks": [
 
509
  0
510
  ],
511
  "shape": [
512
+ 384
513
  ],
514
  "filename_index": 0
515
  },
516
  {
517
  "offsets": [
518
+ 384
519
  ],
520
  "shape": [
521
+ 384
522
  ],
523
  "filename_index": 1
524
  }
525
  ]
526
  },
527
+ "h.4.ln_2.bias": {
528
  "type": "Distributed",
529
  "shape": [
530
+ 768
 
531
  ],
532
  "dtype": "F32",
533
  "chunks": [
534
  {
535
  "offsets": [
 
536
  0
537
  ],
538
  "shape": [
539
+ 384
 
540
  ],
541
  "filename_index": 0
542
  },
543
  {
544
  "offsets": [
545
+ 384
 
546
  ],
547
  "shape": [
548
+ 384
 
549
  ],
550
  "filename_index": 1
551
  }
552
  ]
553
  },
554
+ "h.8.attn.bias": {
555
  "type": "Distributed",
556
  "shape": [
557
+ 1,
558
+ 1,
559
+ 1024,
560
+ 1024
561
  ],
562
  "dtype": "F32",
563
  "chunks": [
564
  {
565
  "offsets": [
566
+ 0,
567
+ 0,
568
+ 0,
569
  0
570
  ],
571
  "shape": [
572
+ 1,
573
+ 1,
574
+ 1024,
575
+ 512
576
  ],
577
  "filename_index": 0
578
  },
579
  {
580
  "offsets": [
581
+ 0,
582
+ 0,
583
+ 0,
584
+ 512
585
  ],
586
  "shape": [
587
+ 1,
588
+ 1,
589
+ 1024,
590
+ 512
591
  ],
592
  "filename_index": 1
593
  }
594
  ]
595
  },
596
+ "h.10.mlp.c_proj.bias": {
597
  "type": "Distributed",
598
  "shape": [
599
  768
 
620
  }
621
  ]
622
  },
623
+ "h.4.attn.bias": {
624
  "type": "Distributed",
625
  "shape": [
626
+ 1,
627
+ 1,
628
+ 1024,
629
+ 1024
630
  ],
631
  "dtype": "F32",
632
  "chunks": [
633
  {
634
  "offsets": [
635
  0,
636
+ 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
  0,
638
  0
639
  ],
640
  "shape": [
641
+ 1,
642
+ 1,
643
+ 1024,
644
+ 512
645
  ],
646
  "filename_index": 0
647
  },
648
  {
649
  "offsets": [
650
  0,
651
+ 0,
652
+ 0,
653
+ 512
654
  ],
655
  "shape": [
656
+ 1,
657
+ 1,
658
+ 1024,
659
+ 512
660
  ],
661
  "filename_index": 1
662
  }
663
  ]
664
  },
665
+ "h.2.ln_1.bias": {
666
  "type": "Distributed",
667
  "shape": [
668
+ 768
 
669
  ],
670
  "dtype": "F32",
671
  "chunks": [
672
  {
673
  "offsets": [
 
674
  0
675
  ],
676
  "shape": [
677
+ 384
 
678
  ],
679
  "filename_index": 0
680
  },
681
  {
682
  "offsets": [
683
+ 384
 
684
  ],
685
  "shape": [
686
+ 384
 
687
  ],
688
  "filename_index": 1
689
  }
690
  ]
691
  },
692
+ "h.8.attn.c_proj.bias": {
693
  "type": "Distributed",
694
  "shape": [
695
  768
 
716
  }
717
  ]
718
  },
719
+ "h.0.mlp.c_proj.weight": {
720
  "type": "Distributed",
721
  "shape": [
722
+ 3072,
723
+ 768
724
  ],
725
  "dtype": "F32",
726
  "chunks": [
 
730
  0
731
  ],
732
  "shape": [
733
+ 1536,
734
+ 768
735
  ],
736
  "filename_index": 0
737
  },
738
  {
739
  "offsets": [
740
+ 1536,
741
+ 0
742
  ],
743
  "shape": [
744
+ 1536,
745
+ 768
746
  ],
747
  "filename_index": 1
748
  }
749
  ]
750
  },
751
+ "h.5.ln_1.bias": {
752
  "type": "Distributed",
753
  "shape": [
754
  768
 
775
  }
776
  ]
777
  },
778
+ "h.7.attn.c_proj.weight": {
779
  "type": "Distributed",
780
  "shape": [
781
+ 768,
782
+ 768
783
  ],
784
  "dtype": "F32",
785
  "chunks": [
786
  {
787
  "offsets": [
788
+ 0,
789
  0
790
  ],
791
  "shape": [
792
+ 384,
793
+ 768
794
  ],
795
  "filename_index": 0
796
  },
797
  {
798
  "offsets": [
799
+ 384,
800
+ 0
801
  ],
802
  "shape": [
803
+ 384,
804
+ 768
805
  ],
806
  "filename_index": 1
807
  }
808
  ]
809
  },
810
+ "h.11.attn.bias": {
811
  "type": "Distributed",
812
  "shape": [
813
+ 1,
814
+ 1,
815
+ 1024,
816
+ 1024
817
  ],
818
  "dtype": "F32",
819
  "chunks": [
820
  {
821
  "offsets": [
822
+ 0,
823
+ 0,
824
  0,
825
  0
826
  ],
827
  "shape": [
828
+ 1,
829
+ 1,
830
+ 1024,
831
+ 512
832
  ],
833
  "filename_index": 0
834
  },
835
  {
836
  "offsets": [
837
+ 0,
838
+ 0,
839
+ 0,
840
+ 512
841
  ],
842
  "shape": [
843
+ 1,
844
+ 1,
845
+ 1024,
846
+ 512
847
  ],
848
  "filename_index": 1
849
  }
850
  ]
851
  },
852
+ "h.9.attn.c_proj.bias": {
853
  "type": "Distributed",
854
  "shape": [
855
  768
 
876
  }
877
  ]
878
  },
879
+ "h.5.mlp.c_fc.weight": {
880
  "type": "Distributed",
881
  "shape": [
882
  768,
883
+ 3072
884
  ],
885
  "dtype": "F32",
886
  "chunks": [
 
890
  0
891
  ],
892
  "shape": [
893
+ 768,
894
+ 1536
895
  ],
896
  "filename_index": 0
897
  },
898
  {
899
  "offsets": [
900
+ 0,
901
+ 1536
902
  ],
903
  "shape": [
904
+ 768,
905
+ 1536
906
  ],
907
  "filename_index": 1
908
  }
909
  ]
910
  },
911
+ "h.0.ln_2.weight": {
912
  "type": "Distributed",
913
  "shape": [
 
914
  768
915
  ],
916
  "dtype": "F32",
917
  "chunks": [
918
  {
919
  "offsets": [
 
920
  0
921
  ],
922
  "shape": [
 
923
  384
924
  ],
925
  "filename_index": 0
926
  },
927
  {
928
  "offsets": [
 
929
  384
930
  ],
931
  "shape": [
 
932
  384
933
  ],
934
  "filename_index": 1
935
  }
936
  ]
937
  },
938
+ "h.4.attn.c_attn.bias": {
939
  "type": "Distributed",
940
  "shape": [
941
+ 2304
942
  ],
943
  "dtype": "F32",
944
  "chunks": [
 
947
  0
948
  ],
949
  "shape": [
950
+ 1152
951
  ],
952
  "filename_index": 0
953
  },
954
  {
955
  "offsets": [
956
+ 1152
957
  ],
958
  "shape": [
959
+ 1152
960
  ],
961
  "filename_index": 1
962
  }
963
  ]
964
  },
965
+ "h.1.mlp.c_proj.weight": {
966
  "type": "Distributed",
967
  "shape": [
968
  3072,
 
994
  }
995
  ]
996
  },
997
+ "h.7.attn.c_proj.bias": {
998
  "type": "Distributed",
999
  "shape": [
1000
+ 768
 
1001
  ],
1002
  "dtype": "F32",
1003
  "chunks": [
1004
  {
1005
  "offsets": [
 
1006
  0
1007
  ],
1008
  "shape": [
1009
+ 384
 
1010
  ],
1011
  "filename_index": 0
1012
  },
1013
  {
1014
  "offsets": [
1015
+ 384
 
1016
  ],
1017
  "shape": [
1018
+ 384
 
1019
  ],
1020
  "filename_index": 1
1021
  }
1022
  ]
1023
  },
1024
+ "h.11.attn.c_proj.bias": {
1025
  "type": "Distributed",
1026
  "shape": [
1027
+ 768
 
 
 
1028
  ],
1029
  "dtype": "F32",
1030
  "chunks": [
1031
  {
1032
  "offsets": [
 
 
 
1033
  0
1034
  ],
1035
  "shape": [
1036
+ 384
 
 
 
1037
  ],
1038
  "filename_index": 0
1039
  },
1040
  {
1041
  "offsets": [
1042
+ 384
 
 
 
1043
  ],
1044
  "shape": [
1045
+ 384
 
 
 
1046
  ],
1047
  "filename_index": 1
1048
  }
1049
  ]
1050
  },
1051
+ "h.4.ln_2.weight": {
1052
  "type": "Distributed",
1053
  "shape": [
1054
  768
 
1075
  }
1076
  ]
1077
  },
1078
+ "h.1.attn.c_proj.bias": {
1079
  "type": "Distributed",
1080
  "shape": [
1081
  768
 
1102
  }
1103
  ]
1104
  },
1105
+ "h.6.mlp.c_fc.bias": {
1106
  "type": "Distributed",
1107
  "shape": [
1108
+ 3072
1109
  ],
1110
  "dtype": "F32",
1111
  "chunks": [
 
1114
  0
1115
  ],
1116
  "shape": [
1117
+ 1536
1118
  ],
1119
  "filename_index": 0
1120
  },
1121
  {
1122
  "offsets": [
1123
+ 1536
1124
  ],
1125
  "shape": [
1126
+ 1536
1127
  ],
1128
  "filename_index": 1
1129
  }
1130
  ]
1131
  },
1132
+ "h.11.attn.c_proj.weight": {
1133
  "type": "Distributed",
1134
  "shape": [
1135
  768,
 
1161
  }
1162
  ]
1163
  },
1164
+ "h.7.attn.c_attn.weight": {
1165
  "type": "Distributed",
1166
  "shape": [
1167
+ 768,
1168
+ 2304
1169
  ],
1170
  "dtype": "F32",
1171
  "chunks": [
1172
  {
1173
  "offsets": [
1174
+ 0,
1175
  0
1176
  ],
1177
  "shape": [
1178
+ 768,
1179
+ 1152
1180
  ],
1181
  "filename_index": 0
1182
  },
1183
  {
1184
  "offsets": [
1185
+ 0,
1186
+ 1152
1187
  ],
1188
  "shape": [
1189
+ 768,
1190
+ 1152
1191
  ],
1192
  "filename_index": 1
1193
  }
1194
  ]
1195
  },
1196
+ "h.7.ln_2.bias": {
1197
  "type": "Distributed",
1198
  "shape": [
 
1199
  768
1200
  ],
1201
  "dtype": "F32",
1202
  "chunks": [
1203
  {
1204
  "offsets": [
 
1205
  0
1206
  ],
1207
  "shape": [
1208
+ 384
 
1209
  ],
1210
  "filename_index": 0
1211
  },
1212
  {
1213
  "offsets": [
1214
+ 384
 
1215
  ],
1216
  "shape": [
1217
+ 384
 
1218
  ],
1219
  "filename_index": 1
1220
  }
1221
  ]
1222
  },
1223
+ "h.6.attn.c_attn.bias": {
1224
  "type": "Distributed",
1225
  "shape": [
1226
+ 2304
1227
  ],
1228
  "dtype": "F32",
1229
  "chunks": [
 
1232
  0
1233
  ],
1234
  "shape": [
1235
+ 1152
1236
  ],
1237
  "filename_index": 0
1238
  },
1239
  {
1240
  "offsets": [
1241
+ 1152
1242
  ],
1243
  "shape": [
1244
+ 1152
1245
  ],
1246
  "filename_index": 1
1247
  }
1248
  ]
1249
  },
1250
+ "h.3.mlp.c_proj.bias": {
1251
  "type": "Distributed",
1252
  "shape": [
1253
+ 768
 
1254
  ],
1255
  "dtype": "F32",
1256
  "chunks": [
1257
  {
1258
  "offsets": [
 
1259
  0
1260
  ],
1261
  "shape": [
1262
+ 384
 
1263
  ],
1264
  "filename_index": 0
1265
  },
1266
  {
1267
  "offsets": [
1268
+ 384
 
1269
  ],
1270
  "shape": [
1271
+ 384
 
1272
  ],
1273
  "filename_index": 1
1274
  }
1275
  ]
1276
  },
1277
+ "h.0.attn.c_proj.bias": {
1278
  "type": "Distributed",
1279
  "shape": [
1280
+ 768
 
1281
  ],
1282
  "dtype": "F32",
1283
  "chunks": [
1284
  {
1285
  "offsets": [
 
1286
  0
1287
  ],
1288
  "shape": [
1289
+ 384
 
1290
  ],
1291
  "filename_index": 0
1292
  },
1293
  {
1294
  "offsets": [
1295
+ 384
 
1296
  ],
1297
  "shape": [
1298
+ 384
 
1299
  ],
1300
  "filename_index": 1
1301
  }
1302
  ]
1303
  },
1304
+ "h.2.ln_2.bias": {
1305
  "type": "Distributed",
1306
  "shape": [
1307
+ 768
 
1308
  ],
1309
  "dtype": "F32",
1310
  "chunks": [
1311
  {
1312
  "offsets": [
 
1313
  0
1314
  ],
1315
  "shape": [
1316
+ 384
 
1317
  ],
1318
  "filename_index": 0
1319
  },
1320
  {
1321
  "offsets": [
1322
+ 384
 
1323
  ],
1324
  "shape": [
1325
+ 384
 
1326
  ],
1327
  "filename_index": 1
1328
  }
1329
  ]
1330
  },
1331
+ "h.9.ln_2.weight": {
1332
  "type": "Distributed",
1333
  "shape": [
1334
  768
 
1355
  }
1356
  ]
1357
  },
1358
+ "h.0.ln_1.bias": {
1359
  "type": "Distributed",
1360
  "shape": [
1361
  768
 
1382
  }
1383
  ]
1384
  },
1385
+ "h.2.ln_1.weight": {
1386
  "type": "Distributed",
1387
  "shape": [
1388
+ 768
1389
  ],
1390
  "dtype": "F32",
1391
  "chunks": [
 
1394
  0
1395
  ],
1396
  "shape": [
1397
+ 384
1398
  ],
1399
  "filename_index": 0
1400
  },
1401
  {
1402
  "offsets": [
1403
+ 384
1404
  ],
1405
  "shape": [
1406
+ 384
1407
  ],
1408
  "filename_index": 1
1409
  }
1410
  ]
1411
  },
1412
+ "h.0.attn.c_attn.bias": {
1413
  "type": "Distributed",
1414
  "shape": [
1415
  2304
 
1436
  }
1437
  ]
1438
  },
1439
+ "h.6.attn.c_proj.bias": {
1440
  "type": "Distributed",
1441
  "shape": [
 
1442
  768
1443
  ],
1444
  "dtype": "F32",
1445
  "chunks": [
1446
  {
1447
  "offsets": [
 
1448
  0
1449
  ],
1450
  "shape": [
 
1451
  384
1452
  ],
1453
  "filename_index": 0
1454
  },
1455
  {
1456
  "offsets": [
 
1457
  384
1458
  ],
1459
  "shape": [
 
1460
  384
1461
  ],
1462
  "filename_index": 1
1463
  }
1464
  ]
1465
  },
1466
+ "h.2.mlp.c_fc.bias": {
1467
  "type": "Distributed",
1468
  "shape": [
 
1469
  3072
1470
  ],
1471
  "dtype": "F32",
1472
  "chunks": [
1473
  {
1474
  "offsets": [
 
1475
  0
1476
  ],
1477
  "shape": [
 
1478
  1536
1479
  ],
1480
  "filename_index": 0
1481
  },
1482
  {
1483
  "offsets": [
 
1484
  1536
1485
  ],
1486
  "shape": [
 
1487
  1536
1488
  ],
1489
  "filename_index": 1
1490
  }
1491
  ]
1492
  },
1493
+ "h.9.ln_1.weight": {
1494
  "type": "Distributed",
1495
  "shape": [
1496
  768
 
1517
  }
1518
  ]
1519
  },
1520
+ "h.8.attn.c_attn.bias": {
1521
  "type": "Distributed",
1522
  "shape": [
1523
+ 2304
1524
  ],
1525
  "dtype": "F32",
1526
  "chunks": [
 
1529
  0
1530
  ],
1531
  "shape": [
1532
+ 1152
1533
  ],
1534
  "filename_index": 0
1535
  },
1536
  {
1537
  "offsets": [
1538
+ 1152
1539
  ],
1540
  "shape": [
1541
+ 1152
1542
  ],
1543
  "filename_index": 1
1544
  }
1545
  ]
1546
  },
1547
+ "h.4.ln_1.bias": {
1548
  "type": "Distributed",
1549
  "shape": [
1550
  768
 
1571
  }
1572
  ]
1573
  },
1574
+ "h.1.ln_2.bias": {
1575
  "type": "Distributed",
1576
  "shape": [
1577
  768
 
1598
  }
1599
  ]
1600
  },
1601
+ "h.7.mlp.c_fc.weight": {
1602
  "type": "Distributed",
1603
  "shape": [
1604
+ 768,
1605
+ 3072
1606
  ],
1607
  "dtype": "F32",
1608
  "chunks": [
1609
  {
1610
  "offsets": [
1611
+ 0,
1612
  0
1613
  ],
1614
  "shape": [
1615
+ 768,
1616
+ 1536
1617
  ],
1618
  "filename_index": 0
1619
  },
1620
  {
1621
  "offsets": [
1622
+ 0,
1623
+ 1536
1624
  ],
1625
  "shape": [
1626
+ 768,
1627
+ 1536
1628
  ],
1629
  "filename_index": 1
1630
  }
1631
  ]
1632
  },
1633
+ "h.7.ln_2.weight": {
1634
  "type": "Distributed",
1635
  "shape": [
 
1636
  768
1637
  ],
1638
  "dtype": "F32",
1639
  "chunks": [
1640
  {
1641
  "offsets": [
 
1642
  0
1643
  ],
1644
  "shape": [
1645
+ 384
 
1646
  ],
1647
  "filename_index": 0
1648
  },
1649
  {
1650
  "offsets": [
1651
+ 384
 
1652
  ],
1653
  "shape": [
1654
+ 384
 
1655
  ],
1656
  "filename_index": 1
1657
  }
1658
  ]
1659
  },
1660
+ "h.6.ln_1.bias": {
1661
  "type": "Distributed",
1662
  "shape": [
1663
+ 768
1664
  ],
1665
  "dtype": "F32",
1666
  "chunks": [
 
1669
  0
1670
  ],
1671
  "shape": [
1672
+ 384
1673
  ],
1674
  "filename_index": 0
1675
  },
1676
  {
1677
  "offsets": [
1678
+ 384
1679
  ],
1680
  "shape": [
1681
+ 384
1682
  ],
1683
  "filename_index": 1
1684
  }
1685
  ]
1686
  },
1687
+ "h.5.mlp.c_fc.bias": {
1688
  "type": "Distributed",
1689
  "shape": [
1690
  3072
 
1711
  }
1712
  ]
1713
  },
1714
+ "h.1.attn.c_proj.weight": {
1715
+ "type": "Distributed",
1716
+ "shape": [
1717
+ 768,
1718
+ 768
1719
+ ],
1720
+ "dtype": "F32",
1721
+ "chunks": [
1722
+ {
1723
+ "offsets": [
1724
+ 0,
1725
+ 0
1726
+ ],
1727
+ "shape": [
1728
+ 384,
1729
+ 768
1730
+ ],
1731
+ "filename_index": 0
1732
+ },
1733
+ {
1734
+ "offsets": [
1735
+ 384,
1736
+ 0
1737
+ ],
1738
+ "shape": [
1739
+ 384,
1740
+ 768
1741
+ ],
1742
+ "filename_index": 1
1743
+ }
1744
+ ]
1745
+ },
1746
  "h.6.mlp.c_proj.bias": {
1747
  "type": "Distributed",
1748
  "shape": [
 
1770
  }
1771
  ]
1772
  },
1773
+ "h.7.attn.c_attn.bias": {
1774
  "type": "Distributed",
1775
  "shape": [
1776
+ 2304
 
1777
  ],
1778
  "dtype": "F32",
1779
  "chunks": [
1780
  {
1781
  "offsets": [
 
1782
  0
1783
  ],
1784
  "shape": [
1785
+ 1152
 
1786
  ],
1787
  "filename_index": 0
1788
  },
1789
  {
1790
  "offsets": [
1791
+ 1152
 
1792
  ],
1793
  "shape": [
1794
+ 1152
 
1795
  ],
1796
  "filename_index": 1
1797
  }
1798
  ]
1799
  },
1800
+ "h.10.ln_1.bias": {
1801
  "type": "Distributed",
1802
  "shape": [
1803
  768
 
1824
  }
1825
  ]
1826
  },
1827
+ "h.9.attn.c_proj.weight": {
1828
  "type": "Distributed",
1829
  "shape": [
1830
  768,
 
1856
  }
1857
  ]
1858
  },
1859
+ "h.2.attn.c_attn.bias": {
1860
  "type": "Distributed",
1861
  "shape": [
1862
+ 2304
1863
  ],
1864
  "dtype": "F32",
1865
  "chunks": [
 
1868
  0
1869
  ],
1870
  "shape": [
1871
+ 1152
1872
  ],
1873
  "filename_index": 0
1874
  },
1875
  {
1876
  "offsets": [
1877
+ 1152
1878
  ],
1879
  "shape": [
1880
+ 1152
1881
  ],
1882
  "filename_index": 1
1883
  }
1884
  ]
1885
  },
1886
+ "h.4.attn.c_proj.weight": {
1887
  "type": "Distributed",
1888
  "shape": [
1889
+ 768,
1890
  768
1891
  ],
1892
  "dtype": "F32",
1893
  "chunks": [
1894
  {
1895
  "offsets": [
1896
+ 0,
1897
  0
1898
  ],
1899
  "shape": [
1900
+ 384,
1901
+ 768
1902
  ],
1903
  "filename_index": 0
1904
  },
1905
  {
1906
  "offsets": [
1907
+ 384,
1908
+ 0
1909
  ],
1910
  "shape": [
1911
+ 384,
1912
+ 768
1913
  ],
1914
  "filename_index": 1
1915
  }
1916
  ]
1917
  },
1918
+ "h.7.mlp.c_proj.weight": {
1919
  "type": "Distributed",
1920
  "shape": [
1921
+ 3072,
1922
  768
1923
  ],
1924
  "dtype": "F32",
1925
  "chunks": [
1926
  {
1927
  "offsets": [
1928
+ 0,
1929
  0
1930
  ],
1931
  "shape": [
1932
+ 1536,
1933
+ 768
1934
  ],
1935
  "filename_index": 0
1936
  },
1937
  {
1938
  "offsets": [
1939
+ 1536,
1940
+ 0
1941
  ],
1942
  "shape": [
1943
+ 1536,
1944
+ 768
1945
  ],
1946
  "filename_index": 1
1947
  }
1948
  ]
1949
  },
1950
+ "h.0.ln_1.weight": {
1951
  "type": "Distributed",
1952
  "shape": [
1953
  768
 
1974
  }
1975
  ]
1976
  },
1977
+ "h.7.ln_1.bias": {
1978
  "type": "Distributed",
1979
  "shape": [
1980
  768
 
2001
  }
2002
  ]
2003
  },
2004
+ "h.1.ln_2.weight": {
2005
  "type": "Distributed",
2006
  "shape": [
2007
+ 768
2008
  ],
2009
  "dtype": "F32",
2010
  "chunks": [
 
2013
  0
2014
  ],
2015
  "shape": [
2016
+ 384
2017
  ],
2018
  "filename_index": 0
2019
  },
2020
  {
2021
  "offsets": [
2022
+ 384
2023
  ],
2024
  "shape": [
2025
+ 384
2026
  ],
2027
  "filename_index": 1
2028
  }
2029
  ]
2030
  },
2031
+ "h.3.mlp.c_fc.bias": {
2032
  "type": "Distributed",
2033
  "shape": [
2034
+ 3072
2035
  ],
2036
  "dtype": "F32",
2037
  "chunks": [
 
2040
  0
2041
  ],
2042
  "shape": [
2043
+ 1536
2044
  ],
2045
  "filename_index": 0
2046
  },
2047
  {
2048
  "offsets": [
2049
+ 1536
2050
  ],
2051
  "shape": [
2052
+ 1536
2053
  ],
2054
  "filename_index": 1
2055
  }
2056
  ]
2057
  },
2058
+ "h.4.attn.c_attn.weight": {
2059
  "type": "Distributed",
2060
  "shape": [
2061
+ 768,
2062
+ 2304
2063
  ],
2064
  "dtype": "F32",
2065
  "chunks": [
2066
  {
2067
  "offsets": [
2068
+ 0,
2069
  0
2070
  ],
2071
  "shape": [
2072
+ 768,
2073
+ 1152
2074
  ],
2075
  "filename_index": 0
2076
  },
2077
  {
2078
  "offsets": [
2079
+ 0,
2080
+ 1152
2081
+ ],
2082
  "shape": [
2083
+ 768,
2084
+ 1152
2085
  ],
2086
  "filename_index": 1
2087
  }
2088
  ]
2089
  },
2090
+ "h.3.mlp.c_proj.weight": {
2091
  "type": "Distributed",
2092
  "shape": [
2093
+ 3072,
2094
  768
2095
  ],
2096
  "dtype": "F32",
2097
  "chunks": [
2098
  {
2099
  "offsets": [
2100
+ 0,
2101
  0
2102
  ],
2103
  "shape": [
2104
+ 1536,
2105
+ 768
2106
  ],
2107
  "filename_index": 0
2108
  },
2109
  {
2110
  "offsets": [
2111
+ 1536,
2112
+ 0
2113
  ],
2114
  "shape": [
2115
+ 1536,
2116
+ 768
2117
  ],
2118
  "filename_index": 1
2119
  }
2120
  ]
2121
  },
2122
+ "h.2.attn.c_attn.weight": {
2123
  "type": "Distributed",
2124
  "shape": [
2125
+ 768,
2126
+ 2304
2127
  ],
2128
  "dtype": "F32",
2129
  "chunks": [
2130
  {
2131
  "offsets": [
2132
+ 0,
2133
  0
2134
  ],
2135
  "shape": [
2136
+ 768,
2137
+ 1152
2138
  ],
2139
  "filename_index": 0
2140
  },
2141
  {
2142
  "offsets": [
2143
+ 0,
2144
+ 1152
2145
  ],
2146
  "shape": [
2147
+ 768,
2148
+ 1152
2149
  ],
2150
  "filename_index": 1
2151
  }
2152
  ]
2153
  },
2154
+ "h.3.ln_1.weight": {
2155
  "type": "Distributed",
2156
  "shape": [
2157
  768
 
2178
  }
2179
  ]
2180
  },
2181
+ "h.9.attn.bias": {
2182
  "type": "Distributed",
2183
  "shape": [
2184
+ 1,
2185
+ 1,
2186
+ 1024,
2187
+ 1024
2188
  ],
2189
  "dtype": "F32",
2190
  "chunks": [
2191
  {
2192
  "offsets": [
2193
+ 0,
2194
+ 0,
2195
+ 0,
2196
  0
2197
  ],
2198
  "shape": [
2199
+ 1,
2200
+ 1,
2201
+ 1024,
2202
+ 512
2203
  ],
2204
  "filename_index": 0
2205
  },
2206
  {
2207
  "offsets": [
2208
+ 0,
2209
+ 0,
2210
+ 0,
2211
+ 512
2212
  ],
2213
  "shape": [
2214
+ 1,
2215
+ 1,
2216
+ 1024,
2217
+ 512
2218
  ],
2219
  "filename_index": 1
2220
  }
2221
  ]
2222
  },
2223
+ "h.6.ln_2.weight": {
2224
  "type": "Distributed",
2225
  "shape": [
 
2226
  768
2227
  ],
2228
  "dtype": "F32",
2229
  "chunks": [
2230
  {
2231
  "offsets": [
 
2232
  0
2233
  ],
2234
  "shape": [
2235
+ 384
 
2236
  ],
2237
  "filename_index": 0
2238
  },
2239
  {
2240
  "offsets": [
2241
+ 384
 
2242
  ],
2243
  "shape": [
2244
+ 384
 
2245
  ],
2246
  "filename_index": 1
2247
  }
2248
  ]
2249
  },
2250
+ "h.11.ln_1.bias": {
2251
  "type": "Distributed",
2252
  "shape": [
2253
  768
 
2274
  }
2275
  ]
2276
  },
2277
+ "h.11.ln_2.bias": {
2278
  "type": "Distributed",
2279
  "shape": [
2280
  768
 
2301
  }
2302
  ]
2303
  },
2304
+ "h.8.ln_1.bias": {
2305
  "type": "Distributed",
2306
  "shape": [
2307
  768
 
2328
  }
2329
  ]
2330
  },
2331
+ "h.4.mlp.c_proj.bias": {
2332
  "type": "Distributed",
2333
  "shape": [
2334
  768
 
2355
  }
2356
  ]
2357
  },
2358
+ "h.11.attn.c_attn.bias": {
2359
  "type": "Distributed",
2360
  "shape": [
 
2361
  2304
2362
  ],
2363
  "dtype": "F32",
2364
  "chunks": [
2365
  {
2366
  "offsets": [
 
2367
  0
2368
  ],
2369
  "shape": [
 
2370
  1152
2371
  ],
2372
  "filename_index": 0
2373
  },
2374
  {
2375
  "offsets": [
 
2376
  1152
2377
  ],
2378
  "shape": [
 
2379
  1152
2380
  ],
2381
  "filename_index": 1
2382
  }
2383
  ]
2384
  },
2385
+ "h.6.ln_1.weight": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2386
  "type": "Distributed",
2387
  "shape": [
 
2388
  768
2389
  ],
2390
  "dtype": "F32",
2391
  "chunks": [
2392
  {
2393
  "offsets": [
 
2394
  0
2395
  ],
2396
  "shape": [
2397
+ 384
 
2398
  ],
2399
  "filename_index": 0
2400
  },
2401
  {
2402
  "offsets": [
2403
+ 384
 
2404
  ],
2405
  "shape": [
2406
+ 384
 
2407
  ],
2408
  "filename_index": 1
2409
  }
2410
  ]
2411
  },
2412
+ "ln_f.weight": {
2413
  "type": "Distributed",
2414
  "shape": [
2415
+ 768
 
2416
  ],
2417
  "dtype": "F32",
2418
  "chunks": [
2419
  {
2420
  "offsets": [
 
2421
  0
2422
  ],
2423
  "shape": [
2424
+ 384
 
2425
  ],
2426
  "filename_index": 0
2427
  },
2428
  {
2429
  "offsets": [
2430
+ 384
 
2431
  ],
2432
  "shape": [
2433
+ 384
 
2434
  ],
2435
  "filename_index": 1
2436
  }
2437
  ]
2438
  },
2439
+ "ln_f.bias": {
2440
  "type": "Distributed",
2441
  "shape": [
2442
  768
 
2463
  }
2464
  ]
2465
  },
2466
+ "h.2.attn.bias": {
2467
  "type": "Distributed",
2468
  "shape": [
2469
+ 1,
2470
+ 1,
2471
+ 1024,
2472
+ 1024
2473
  ],
2474
  "dtype": "F32",
2475
  "chunks": [
2476
  {
2477
  "offsets": [
2478
+ 0,
2479
+ 0,
2480
+ 0,
2481
  0
2482
  ],
2483
  "shape": [
2484
+ 1,
2485
+ 1,
2486
+ 1024,
2487
+ 512
2488
  ],
2489
  "filename_index": 0
2490
  },
2491
  {
2492
  "offsets": [
2493
+ 0,
2494
+ 0,
2495
+ 0,
2496
+ 512
2497
  ],
2498
  "shape": [
2499
+ 1,
2500
+ 1,
2501
+ 1024,
2502
+ 512
2503
  ],
2504
  "filename_index": 1
2505
  }
2506
  ]
2507
  },
2508
+ "h.10.attn.c_attn.bias": {
2509
  "type": "Distributed",
2510
  "shape": [
2511
+ 2304
2512
  ],
2513
  "dtype": "F32",
2514
  "chunks": [
 
2517
  0
2518
  ],
2519
  "shape": [
2520
+ 1152
2521
  ],
2522
  "filename_index": 0
2523
  },
2524
  {
2525
  "offsets": [
2526
+ 1152
2527
  ],
2528
  "shape": [
2529
+ 1152
2530
  ],
2531
  "filename_index": 1
2532
  }
2533
  ]
2534
  },
2535
+ "h.11.mlp.c_fc.bias": {
2536
  "type": "Distributed",
2537
  "shape": [
2538
+ 3072
2539
  ],
2540
  "dtype": "F32",
2541
  "chunks": [
 
2544
  0
2545
  ],
2546
  "shape": [
2547
+ 1536
2548
  ],
2549
  "filename_index": 0
2550
  },
2551
  {
2552
  "offsets": [
2553
+ 1536
2554
  ],
2555
  "shape": [
2556
+ 1536
2557
  ],
2558
  "filename_index": 1
2559
  }
2560
  ]
2561
  },
2562
+ "h.3.mlp.c_fc.weight": {
2563
  "type": "Distributed",
2564
  "shape": [
2565
+ 768,
2566
+ 3072
2567
  ],
2568
  "dtype": "F32",
2569
  "chunks": [
2570
  {
2571
  "offsets": [
2572
+ 0,
2573
  0
2574
  ],
2575
  "shape": [
2576
+ 768,
2577
+ 1536
2578
  ],
2579
  "filename_index": 0
2580
  },
2581
  {
2582
  "offsets": [
2583
+ 0,
2584
+ 1536
2585
  ],
2586
  "shape": [
2587
+ 768,
2588
+ 1536
2589
  ],
2590
  "filename_index": 1
2591
  }
2592
  ]
2593
  },
2594
+ "h.7.attn.bias": {
2595
  "type": "Distributed",
2596
  "shape": [
2597
  1,
 
2633
  }
2634
  ]
2635
  },
2636
+ "h.0.mlp.c_fc.weight": {
2637
  "type": "Distributed",
2638
  "shape": [
2639
+ 768,
2640
+ 3072
2641
  ],
2642
  "dtype": "F32",
2643
  "chunks": [
2644
  {
2645
  "offsets": [
2646
+ 0,
2647
  0
2648
  ],
2649
  "shape": [
2650
+ 768,
2651
+ 1536
2652
  ],
2653
  "filename_index": 0
2654
  },
2655
  {
2656
  "offsets": [
2657
+ 0,
2658
+ 1536
2659
  ],
2660
  "shape": [
2661
+ 768,
2662
+ 1536
2663
  ],
2664
  "filename_index": 1
2665
  }
2666
  ]
2667
  },
2668
+ "h.5.mlp.c_proj.bias": {
2669
  "type": "Distributed",
2670
  "shape": [
2671
  768
 
2692
  }
2693
  ]
2694
  },
2695
+ "h.6.attn.c_attn.weight": {
2696
  "type": "Distributed",
2697
  "shape": [
2698
  768,
2699
+ 2304
2700
  ],
2701
  "dtype": "F32",
2702
  "chunks": [
 
2707
  ],
2708
  "shape": [
2709
  768,
2710
+ 1152
2711
  ],
2712
  "filename_index": 0
2713
  },
2714
  {
2715
  "offsets": [
2716
  0,
2717
+ 1152
2718
  ],
2719
  "shape": [
2720
  768,
2721
+ 1152
2722
  ],
2723
  "filename_index": 1
2724
  }
2725
  ]
2726
  },
2727
+ "h.8.mlp.c_fc.bias": {
2728
  "type": "Distributed",
2729
  "shape": [
2730
+ 3072
 
 
 
2731
  ],
2732
  "dtype": "F32",
2733
  "chunks": [
2734
  {
2735
  "offsets": [
 
 
 
2736
  0
2737
  ],
2738
  "shape": [
2739
+ 1536
 
 
 
2740
  ],
2741
  "filename_index": 0
2742
  },
2743
  {
2744
  "offsets": [
2745
+ 1536
 
 
 
2746
  ],
2747
  "shape": [
2748
+ 1536
 
 
 
2749
  ],
2750
  "filename_index": 1
2751
  }
2752
  ]
2753
  },
2754
+ "h.10.ln_2.bias": {
2755
  "type": "Distributed",
2756
  "shape": [
2757
  768
 
2778
  }
2779
  ]
2780
  },
2781
+ "h.7.ln_1.weight": {
2782
  "type": "Distributed",
2783
  "shape": [
2784
+ 768
 
 
 
2785
  ],
2786
  "dtype": "F32",
2787
  "chunks": [
2788
  {
2789
  "offsets": [
 
 
 
2790
  0
2791
  ],
2792
  "shape": [
2793
+ 384
 
 
 
2794
  ],
2795
  "filename_index": 0
2796
  },
2797
  {
2798
  "offsets": [
2799
+ 384
 
 
 
2800
  ],
2801
  "shape": [
2802
+ 384
 
 
 
2803
  ],
2804
  "filename_index": 1
2805
  }
2806
  ]
2807
  },
2808
+ "h.10.mlp.c_proj.weight": {
2809
  "type": "Distributed",
2810
  "shape": [
2811
+ 3072,
2812
+ 768
2813
  ],
2814
  "dtype": "F32",
2815
  "chunks": [
 
2819
  0
2820
  ],
2821
  "shape": [
2822
+ 1536,
2823
+ 768
2824
  ],
2825
  "filename_index": 0
2826
  },
2827
  {
2828
  "offsets": [
2829
+ 1536,
2830
+ 0
2831
  ],
2832
  "shape": [
2833
+ 1536,
2834
+ 768
2835
  ],
2836
  "filename_index": 1
2837
  }
2838
  ]
2839
  },
2840
+ "h.8.attn.c_attn.weight": {
2841
  "type": "Distributed",
2842
  "shape": [
2843
+ 768,
2844
+ 2304
2845
  ],
2846
  "dtype": "F32",
2847
  "chunks": [
2848
  {
2849
  "offsets": [
2850
+ 0,
2851
  0
2852
  ],
2853
  "shape": [
2854
+ 768,
2855
+ 1152
2856
  ],
2857
  "filename_index": 0
2858
  },
2859
  {
2860
  "offsets": [
2861
+ 0,
2862
+ 1152
2863
  ],
2864
  "shape": [
2865
+ 768,
2866
+ 1152
2867
  ],
2868
  "filename_index": 1
2869
  }
2870
  ]
2871
  },
2872
+ "h.9.attn.c_attn.bias": {
2873
  "type": "Distributed",
2874
  "shape": [
2875
+ 2304
2876
  ],
2877
  "dtype": "F32",
2878
  "chunks": [
 
2881
  0
2882
  ],
2883
  "shape": [
2884
+ 1152
2885
  ],
2886
  "filename_index": 0
2887
  },
2888
  {
2889
  "offsets": [
2890
+ 1152
2891
  ],
2892
  "shape": [
2893
+ 1152
2894
  ],
2895
  "filename_index": 1
2896
  }
2897
  ]
2898
  },
2899
+ "h.1.attn.c_attn.weight": {
2900
  "type": "Distributed",
2901
  "shape": [
2902
+ 768,
2903
+ 2304
2904
  ],
2905
  "dtype": "F32",
2906
  "chunks": [
2907
  {
2908
  "offsets": [
2909
+ 0,
2910
  0
2911
  ],
2912
  "shape": [
2913
+ 768,
2914
+ 1152
2915
  ],
2916
  "filename_index": 0
2917
  },
2918
  {
2919
  "offsets": [
2920
+ 0,
2921
+ 1152
2922
  ],
2923
  "shape": [
2924
+ 768,
2925
+ 1152
2926
  ],
2927
  "filename_index": 1
2928
  }
2929
  ]
2930
  },
2931
+ "h.1.attn.bias": {
2932
  "type": "Distributed",
2933
  "shape": [
2934
  1,
 
2970
  }
2971
  ]
2972
  },
2973
+ "h.6.mlp.c_proj.weight": {
2974
  "type": "Distributed",
2975
  "shape": [
2976
+ 3072,
2977
+ 768
2978
  ],
2979
  "dtype": "F32",
2980
  "chunks": [
 
2984
  0
2985
  ],
2986
  "shape": [
2987
+ 1536,
2988
+ 768
2989
  ],
2990
  "filename_index": 0
2991
  },
2992
  {
2993
  "offsets": [
2994
+ 1536,
2995
+ 0
2996
  ],
2997
  "shape": [
2998
+ 1536,
2999
+ 768
3000
  ],
3001
  "filename_index": 1
3002
  }
3003
  ]
3004
  },
3005
+ "h.5.ln_2.weight": {
3006
  "type": "Distributed",
3007
  "shape": [
3008
  768
 
3029
  }
3030
  ]
3031
  },
3032
+ "h.7.mlp.c_proj.bias": {
3033
  "type": "Distributed",
3034
  "shape": [
3035
+ 768
 
 
 
3036
  ],
3037
  "dtype": "F32",
3038
  "chunks": [
3039
  {
3040
  "offsets": [
 
 
 
3041
  0
3042
  ],
3043
  "shape": [
3044
+ 384
 
 
 
3045
  ],
3046
  "filename_index": 0
3047
  },
3048
  {
3049
  "offsets": [
3050
+ 384
 
 
 
3051
  ],
3052
  "shape": [
3053
+ 384
 
 
 
3054
  ],
3055
  "filename_index": 1
3056
  }
3057
  ]
3058
  },
3059
+ "h.0.attn.c_proj.weight": {
3060
  "type": "Distributed",
3061
  "shape": [
3062
  768,
 
3088
  }
3089
  ]
3090
  },
3091
+ "h.3.attn.bias": {
3092
  "type": "Distributed",
3093
  "shape": [
3094
+ 1,
3095
+ 1,
3096
+ 1024,
3097
+ 1024
3098
  ],
3099
  "dtype": "F32",
3100
  "chunks": [
3101
  {
3102
  "offsets": [
3103
+ 0,
3104
+ 0,
3105
+ 0,
3106
  0
3107
  ],
3108
  "shape": [
3109
+ 1,
3110
+ 1,
3111
+ 1024,
3112
+ 512
3113
  ],
3114
  "filename_index": 0
3115
  },
3116
  {
3117
  "offsets": [
3118
+ 0,
3119
+ 0,
3120
+ 0,
3121
+ 512
3122
  ],
3123
  "shape": [
3124
+ 1,
3125
+ 1,
3126
+ 1024,
3127
+ 512
3128
  ],
3129
  "filename_index": 1
3130
  }
3131
  ]
3132
  },
3133
+ "h.2.mlp.c_proj.weight": {
3134
  "type": "Distributed",
3135
  "shape": [
3136
+ 3072,
3137
  768
3138
  ],
3139
  "dtype": "F32",
3140
  "chunks": [
3141
  {
3142
  "offsets": [
3143
+ 0,
3144
  0
3145
  ],
3146
  "shape": [
3147
+ 1536,
3148
+ 768
3149
  ],
3150
  "filename_index": 0
3151
  },
3152
  {
3153
  "offsets": [
3154
+ 1536,
3155
+ 0
3156
  ],
3157
  "shape": [
3158
+ 1536,
3159
+ 768
3160
  ],
3161
  "filename_index": 1
3162
  }
3163
  ]
3164
  },
3165
+ "h.5.ln_1.weight": {
3166
  "type": "Distributed",
3167
  "shape": [
3168
  768
 
3189
  }
3190
  ]
3191
  },
3192
+ "h.6.attn.c_proj.weight": {
3193
  "type": "Distributed",
3194
  "shape": [
3195
+ 768,
3196
  768
3197
  ],
3198
  "dtype": "F32",
3199
  "chunks": [
3200
  {
3201
  "offsets": [
3202
+ 0,
3203
  0
3204
  ],
3205
  "shape": [
3206
+ 384,
3207
+ 768
3208
  ],
3209
  "filename_index": 0
3210
  },
3211
  {
3212
  "offsets": [
3213
+ 384,
3214
+ 0
3215
  ],
3216
  "shape": [
3217
+ 384,
3218
+ 768
3219
  ],
3220
  "filename_index": 1
3221
  }
3222
  ]
3223
  },
3224
+ "wpe.weight": {
3225
  "type": "Distributed",
3226
  "shape": [
3227
+ 1024,
3228
  768
3229
  ],
3230
  "dtype": "F32",
3231
  "chunks": [
3232
  {
3233
  "offsets": [
3234
+ 0,
3235
  0
3236
  ],
3237
  "shape": [
3238
+ 1024,
3239
  384
3240
  ],
3241
  "filename_index": 0
3242
  },
3243
  {
3244
  "offsets": [
3245
+ 0,
3246
  384
3247
  ],
3248
  "shape": [
3249
+ 1024,
3250
  384
3251
  ],
3252
  "filename_index": 1
3253
  }
3254
  ]
3255
  },
3256
+ "h.11.ln_2.weight": {
3257
  "type": "Distributed",
3258
  "shape": [
3259
+ 768
3260
  ],
3261
  "dtype": "F32",
3262
  "chunks": [
 
3265
  0
3266
  ],
3267
  "shape": [
3268
+ 384
3269
  ],
3270
  "filename_index": 0
3271
  },
3272
  {
3273
  "offsets": [
3274
+ 384
3275
  ],
3276
  "shape": [
3277
+ 384
3278
  ],
3279
  "filename_index": 1
3280
  }
3281
  ]
3282
  },
3283
+ "h.1.attn.c_attn.bias": {
3284
  "type": "Distributed",
3285
  "shape": [
3286
+ 2304
3287
  ],
3288
  "dtype": "F32",
3289
  "chunks": [
 
3292
  0
3293
  ],
3294
  "shape": [
3295
+ 1152
3296
  ],
3297
  "filename_index": 0
3298
  },
3299
  {
3300
  "offsets": [
3301
+ 1152
3302
  ],
3303
  "shape": [
3304
+ 1152
3305
  ],
3306
  "filename_index": 1
3307
  }
3308
  ]
3309
  },
3310
+ "h.10.attn.c_proj.weight": {
3311
  "type": "Distributed",
3312
  "shape": [
3313
+ 768,
3314
+ 768
3315
  ],
3316
  "dtype": "F32",
3317
  "chunks": [
3318
  {
3319
  "offsets": [
3320
+ 0,
3321
  0
3322
  ],
3323
  "shape": [
3324
+ 384,
3325
+ 768
3326
  ],
3327
  "filename_index": 0
3328
  },
3329
  {
3330
  "offsets": [
3331
+ 384,
3332
+ 0
3333
  ],
3334
  "shape": [
3335
+ 384,
3336
+ 768
3337
  ],
3338
  "filename_index": 1
3339
  }
3340
  ]
3341
  },
3342
+ "h.0.attn.bias": {
3343
  "type": "Distributed",
3344
  "shape": [
3345
+ 1,
3346
+ 1,
3347
+ 1024,
3348
+ 1024
3349
  ],
3350
  "dtype": "F32",
3351
  "chunks": [
3352
  {
3353
  "offsets": [
3354
+ 0,
3355
+ 0,
3356
+ 0,
3357
  0
3358
  ],
3359
  "shape": [
3360
+ 1,
3361
+ 1,
3362
+ 1024,
3363
+ 512
3364
  ],
3365
  "filename_index": 0
3366
  },
3367
  {
3368
  "offsets": [
3369
+ 0,
3370
+ 0,
3371
+ 0,
3372
+ 512
3373
  ],
3374
  "shape": [
3375
+ 1,
3376
+ 1,
3377
+ 1024,
3378
+ 512
3379
  ],
3380
  "filename_index": 1
3381
  }
3382
  ]
3383
  },
3384
+ "h.4.mlp.c_fc.weight": {
3385
  "type": "Distributed",
3386
  "shape": [
3387
  768,
3388
+ 3072
3389
  ],
3390
  "dtype": "F32",
3391
  "chunks": [
 
3396
  ],
3397
  "shape": [
3398
  768,
3399
+ 1536
3400
  ],
3401
  "filename_index": 0
3402
  },
3403
  {
3404
  "offsets": [
3405
  0,
3406
+ 1536
3407
  ],
3408
  "shape": [
3409
  768,
3410
+ 1536
3411
  ],
3412
  "filename_index": 1
3413
  }
3414
  ]
3415
  },
3416
+ "h.9.ln_1.bias": {
3417
  "type": "Distributed",
3418
  "shape": [
3419
+ 768
 
 
 
3420
  ],
3421
  "dtype": "F32",
3422
  "chunks": [
3423
  {
3424
  "offsets": [
 
 
 
3425
  0
3426
  ],
3427
  "shape": [
3428
+ 384
 
 
 
3429
  ],
3430
  "filename_index": 0
3431
  },
3432
  {
3433
  "offsets": [
3434
+ 384
3435
+ ],
3436
+ "shape": [
3437
+ 384
3438
+ ],
3439
+ "filename_index": 1
3440
+ }
3441
+ ]
3442
+ },
3443
+ "h.0.mlp.c_fc.bias": {
3444
+ "type": "Distributed",
3445
+ "shape": [
3446
+ 3072
3447
+ ],
3448
+ "dtype": "F32",
3449
+ "chunks": [
3450
+ {
3451
+ "offsets": [
3452
+ 0
3453
+ ],
3454
+ "shape": [
3455
+ 1536
3456
+ ],
3457
+ "filename_index": 0
3458
+ },
3459
+ {
3460
+ "offsets": [
3461
+ 1536
3462
  ],
3463
  "shape": [
3464
+ 1536
 
 
 
3465
  ],
3466
  "filename_index": 1
3467
  }
3468
  ]
3469
  },
3470
+ "h.11.mlp.c_proj.bias": {
3471
  "type": "Distributed",
3472
  "shape": [
3473
  768
 
3494
  }
3495
  ]
3496
  },
3497
+ "h.8.attn.c_proj.weight": {
3498
  "type": "Distributed",
3499
  "shape": [
3500
+ 768,
3501
+ 768
3502
  ],
3503
  "dtype": "F32",
3504
  "chunks": [
3505
  {
3506
  "offsets": [
3507
+ 0,
3508
  0
3509
  ],
3510
  "shape": [
3511
+ 384,
3512
+ 768
3513
  ],
3514
  "filename_index": 0
3515
  },
3516
  {
3517
  "offsets": [
3518
+ 384,
3519
+ 0
3520
  ],
3521
  "shape": [
3522
+ 384,
3523
+ 768
3524
  ],
3525
  "filename_index": 1
3526
  }
3527
  ]
3528
  },
3529
+ "h.3.attn.c_proj.weight": {
3530
  "type": "Distributed",
3531
  "shape": [
3532
+ 768,
3533
  768
3534
  ],
3535
  "dtype": "F32",
 
3540
  0
3541
  ],
3542
  "shape": [
3543
+ 384,
3544
  768
3545
  ],
3546
  "filename_index": 0
3547
  },
3548
  {
3549
  "offsets": [
3550
+ 384,
3551
  0
3552
  ],
3553
  "shape": [
3554
+ 384,
3555
  768
3556
  ],
3557
  "filename_index": 1
3558
  }
3559
  ]
3560
  },
3561
+ "h.11.attn.c_attn.weight": {
3562
  "type": "Distributed",
3563
  "shape": [
3564
+ 768,
3565
+ 2304
3566
  ],
3567
  "dtype": "F32",
3568
  "chunks": [
3569
  {
3570
  "offsets": [
3571
+ 0,
3572
  0
3573
  ],
3574
  "shape": [
3575
+ 768,
3576
+ 1152
3577
  ],
3578
  "filename_index": 0
3579
  },
3580
  {
3581
  "offsets": [
3582
+ 0,
3583
+ 1152
3584
  ],
3585
  "shape": [
3586
+ 768,
3587
+ 1152
3588
  ],
3589
  "filename_index": 1
3590
  }
3591
  ]
3592
  },
3593
+ "h.9.attn.c_attn.weight": {
3594
  "type": "Distributed",
3595
  "shape": [
3596
+ 768,
3597
+ 2304
 
 
3598
  ],
3599
  "dtype": "F32",
3600
  "chunks": [
3601
  {
3602
  "offsets": [
 
 
3603
  0,
3604
  0
3605
  ],
3606
  "shape": [
3607
+ 768,
3608
+ 1152
 
 
3609
  ],
3610
  "filename_index": 0
3611
  },
3612
  {
3613
  "offsets": [
3614
  0,
3615
+ 1152
 
 
3616
  ],
3617
  "shape": [
3618
+ 768,
3619
+ 1152
 
 
3620
  ],
3621
  "filename_index": 1
3622
  }
3623
  ]
3624
  },
3625
+ "h.4.mlp.c_fc.bias": {
3626
  "type": "Distributed",
3627
  "shape": [
3628
+ 3072
 
 
 
3629
  ],
3630
  "dtype": "F32",
3631
  "chunks": [
3632
  {
3633
  "offsets": [
 
 
 
3634
  0
3635
  ],
3636
  "shape": [
3637
+ 1536
 
 
 
3638
  ],
3639
  "filename_index": 0
3640
  },
3641
  {
3642
  "offsets": [
3643
+ 1536
 
 
 
3644
  ],
3645
  "shape": [
3646
+ 1536
 
 
 
3647
  ],
3648
  "filename_index": 1
3649
  }
3650
  ]
3651
  },
3652
+ "h.3.ln_2.weight": {
3653
  "type": "Distributed",
3654
  "shape": [
 
3655
  768
3656
  ],
3657
  "dtype": "F32",
3658
  "chunks": [
3659
  {
3660
  "offsets": [
 
3661
  0
3662
  ],
3663
  "shape": [
3664
+ 384
 
3665
  ],
3666
  "filename_index": 0
3667
  },
3668
  {
3669
  "offsets": [
3670
+ 384
 
3671
  ],
3672
  "shape": [
3673
+ 384
 
3674
  ],
3675
  "filename_index": 1
3676
  }
3677
  ]
3678
  },
3679
+ "h.10.ln_2.weight": {
3680
  "type": "Distributed",
3681
  "shape": [
3682
  768
 
3703
  }
3704
  ]
3705
  },
3706
+ "h.9.mlp.c_fc.weight": {
3707
  "type": "Distributed",
3708
  "shape": [
3709
+ 768,
3710
+ 3072
3711
  ],
3712
  "dtype": "F32",
3713
  "chunks": [
3714
  {
3715
  "offsets": [
3716
+ 0,
3717
  0
3718
  ],
3719
  "shape": [
3720
+ 768,
3721
+ 1536
3722
  ],
3723
  "filename_index": 0
3724
  },
3725
  {
3726
  "offsets": [
3727
+ 0,
3728
+ 1536
3729
  ],
3730
  "shape": [
3731
+ 768,
3732
+ 1536
3733
  ],
3734
  "filename_index": 1
3735
  }
3736
  ]
3737
  },
3738
+ "h.4.ln_1.weight": {
3739
  "type": "Distributed",
3740
  "shape": [
3741
  768
 
3762
  }
3763
  ]
3764
  },
3765
+ "h.1.mlp.c_fc.bias": {
3766
  "type": "Distributed",
3767
  "shape": [
3768
+ 3072
 
3769
  ],
3770
  "dtype": "F32",
3771
  "chunks": [
3772
  {
3773
  "offsets": [
 
3774
  0
3775
  ],
3776
  "shape": [
3777
+ 1536
 
3778
  ],
3779
  "filename_index": 0
3780
  },
3781
  {
3782
  "offsets": [
3783
+ 1536
 
3784
  ],
3785
  "shape": [
3786
+ 1536
 
3787
  ],
3788
  "filename_index": 1
3789
  }
3790
  ]
3791
  },
3792
+ "h.9.mlp.c_proj.bias": {
3793
  "type": "Distributed",
3794
  "shape": [
3795
+ 768
 
3796
  ],
3797
  "dtype": "F32",
3798
  "chunks": [
3799
  {
3800
  "offsets": [
 
3801
  0
3802
  ],
3803
  "shape": [
3804
+ 384
 
3805
  ],
3806
  "filename_index": 0
3807
  },
3808
  {
3809
  "offsets": [
3810
+ 384
 
3811
  ],
3812
  "shape": [
3813
+ 384
 
3814
  ],
3815
  "filename_index": 1
3816
  }
3817
  ]
3818
  },
3819
+ "h.10.attn.c_proj.bias": {
3820
  "type": "Distributed",
3821
  "shape": [
3822
  768
 
3843
  }
3844
  ]
3845
  },
3846
+ "h.6.ln_2.bias": {
3847
  "type": "Distributed",
3848
  "shape": [
3849
  768
 
3870
  }
3871
  ]
3872
  },
3873
+ "h.6.attn.bias": {
3874
  "type": "Distributed",
3875
  "shape": [
3876
+ 1,
3877
+ 1,
3878
+ 1024,
3879
+ 1024
3880
  ],
3881
  "dtype": "F32",
3882
  "chunks": [
3883
  {
3884
  "offsets": [
3885
+ 0,
3886
+ 0,
3887
+ 0,
3888
  0
3889
  ],
3890
  "shape": [
3891
+ 1,
3892
+ 1,
3893
+ 1024,
3894
+ 512
3895
  ],
3896
  "filename_index": 0
3897
  },
3898
  {
3899
  "offsets": [
3900
+ 0,
3901
+ 0,
3902
+ 0,
3903
+ 512
3904
  ],
3905
  "shape": [
3906
+ 1,
3907
+ 1,
3908
+ 1024,
3909
+ 512
3910
  ],
3911
  "filename_index": 1
3912
  }
3913
  ]
3914
  },
3915
+ "h.8.ln_1.weight": {
3916
  "type": "Distributed",
3917
  "shape": [
3918
  768
 
3939
  }
3940
  ]
3941
  },
3942
+ "h.0.mlp.c_proj.bias": {
3943
  "type": "Distributed",
3944
  "shape": [
3945
  768
 
3966
  }
3967
  ]
3968
  },
3969
+ "h.2.attn.c_proj.bias": {
3970
  "type": "Distributed",
3971
  "shape": [
3972
  768
 
3993
  }
3994
  ]
3995
  },
3996
+ "h.0.ln_2.bias": {
3997
  "type": "Distributed",
3998
  "shape": [
 
3999
  768
4000
  ],
4001
  "dtype": "F32",
4002
  "chunks": [
4003
  {
4004
  "offsets": [
 
4005
  0
4006
  ],
4007
  "shape": [
4008
+ 384
 
4009
  ],
4010
  "filename_index": 0
4011
  },
4012
  {
4013
  "offsets": [
4014
+ 384
 
4015
  ],
4016
  "shape": [
4017
+ 384
 
4018
  ],
4019
  "filename_index": 1
4020
  }
4021
  ]
4022
  },
4023
+ "h.8.mlp.c_proj.bias": {
4024
  "type": "Distributed",
4025
  "shape": [
4026
  768
 
4047
  }
4048
  ]
4049
  },
4050
+ "h.9.mlp.c_fc.bias": {
4051
  "type": "Distributed",
4052
  "shape": [
4053
+ 3072
 
4054
  ],
4055
  "dtype": "F32",
4056
  "chunks": [
4057
  {
4058
  "offsets": [
 
4059
  0
4060
  ],
4061
  "shape": [
4062
+ 1536
 
4063
  ],
4064
  "filename_index": 0
4065
  },
4066
  {
4067
  "offsets": [
4068
+ 1536
4069
+ ],
4070
+ "shape": [
4071
+ 1536
4072
+ ],
4073
+ "filename_index": 1
4074
+ }
4075
+ ]
4076
+ },
4077
+ "h.3.attn.c_attn.bias": {
4078
+ "type": "Distributed",
4079
+ "shape": [
4080
+ 2304
4081
+ ],
4082
+ "dtype": "F32",
4083
+ "chunks": [
4084
+ {
4085
+ "offsets": [
4086
  0
4087
  ],
4088
  "shape": [
4089
+ 1152
4090
+ ],
4091
+ "filename_index": 0
4092
+ },
4093
+ {
4094
+ "offsets": [
4095
+ 1152
4096
+ ],
4097
+ "shape": [
4098
+ 1152
4099
  ],
4100
  "filename_index": 1
4101
  }
4102
  ]
4103
  },
4104
+ "h.3.attn.c_attn.weight": {
4105
  "type": "Distributed",
4106
  "shape": [
4107
+ 768,
4108
+ 2304
 
 
4109
  ],
4110
  "dtype": "F32",
4111
  "chunks": [
4112
  {
4113
  "offsets": [
 
 
4114
  0,
4115
  0
4116
  ],
4117
  "shape": [
4118
+ 768,
4119
+ 1152
 
 
4120
  ],
4121
  "filename_index": 0
4122
  },
4123
  {
4124
  "offsets": [
4125
  0,
4126
+ 1152
 
 
4127
  ],
4128
  "shape": [
4129
+ 768,
4130
+ 1152
 
 
4131
  ],
4132
  "filename_index": 1
4133
  }
4134
  ]
4135
  },
4136
+ "h.5.ln_2.bias": {
4137
  "type": "Distributed",
4138
  "shape": [
4139
  768
 
4160
  }
4161
  ]
4162
  },
4163
+ "h.5.attn.c_proj.bias": {
4164
  "type": "Distributed",
4165
  "shape": [
4166
  768
 
4187
  }
4188
  ]
4189
  },
4190
+ "h.3.ln_1.bias": {
4191
  "type": "Distributed",
4192
  "shape": [
4193
+ 768
 
4194
  ],
4195
  "dtype": "F32",
4196
  "chunks": [
4197
  {
4198
  "offsets": [
 
4199
  0
4200
  ],
4201
  "shape": [
4202
+ 384
 
4203
  ],
4204
  "filename_index": 0
4205
  },
4206
  {
4207
  "offsets": [
4208
+ 384
 
4209
  ],
4210
  "shape": [
4211
+ 384
 
4212
  ],
4213
  "filename_index": 1
4214
  }
4215
  ]
4216
  },
4217
+ "h.2.mlp.c_proj.bias": {
4218
  "type": "Distributed",
4219
  "shape": [
4220
+ 768
4221
  ],
4222
  "dtype": "F32",
4223
  "chunks": [
 
4226
  0
4227
  ],
4228
  "shape": [
4229
+ 384
4230
  ],
4231
  "filename_index": 0
4232
  },
4233
  {
4234
  "offsets": [
4235
+ 384
4236
  ],
4237
  "shape": [
4238
+ 384
4239
  ],
4240
  "filename_index": 1
4241
  }
4242
  ]
4243
  },
4244
+ "h.4.attn.c_proj.bias": {
4245
  "type": "Distributed",
4246
  "shape": [
4247
  768
 
4268
  }
4269
  ]
4270
  },
4271
+ "h.1.ln_1.weight": {
4272
  "type": "Distributed",
4273
  "shape": [
4274
+ 768
 
4275
  ],
4276
  "dtype": "F32",
4277
  "chunks": [
4278
  {
4279
  "offsets": [
 
4280
  0
4281
  ],
4282
  "shape": [
4283
+ 384
 
4284
  ],
4285
  "filename_index": 0
4286
  },
4287
  {
4288
  "offsets": [
4289
+ 384
 
4290
  ],
4291
  "shape": [
4292
+ 384
 
4293
  ],
4294
  "filename_index": 1
4295
  }
4296
  ]
4297
  },
4298
+ "h.8.ln_2.bias": {
4299
  "type": "Distributed",
4300
  "shape": [
4301
  768
 
4322
  }
4323
  ]
4324
  },
4325
+ "h.6.mlp.c_fc.weight": {
4326
  "type": "Distributed",
4327
  "shape": [
4328
+ 768,
4329
+ 3072
4330
  ],
4331
  "dtype": "F32",
4332
  "chunks": [
4333
  {
4334
  "offsets": [
4335
+ 0,
4336
  0
4337
  ],
4338
  "shape": [
4339
+ 768,
4340
+ 1536
4341
  ],
4342
  "filename_index": 0
4343
  },
4344
  {
4345
  "offsets": [
4346
+ 0,
4347
+ 1536
4348
  ],
4349
  "shape": [
4350
+ 768,
4351
+ 1536
4352
  ],
4353
  "filename_index": 1
4354
  }
4355
  ]
4356
  },
4357
+ "h.2.ln_2.weight": {
4358
  "type": "Distributed",
4359
  "shape": [
4360
+ 768
4361
  ],
4362
  "dtype": "F32",
4363
  "chunks": [
 
4366
  0
4367
  ],
4368
  "shape": [
4369
+ 384
4370
  ],
4371
  "filename_index": 0
4372
  },
4373
  {
4374
  "offsets": [
4375
+ 384
4376
  ],
4377
  "shape": [
4378
+ 384
4379
  ],
4380
  "filename_index": 1
4381
  }
4382
  ]
4383
  },
4384
+ "h.10.mlp.c_fc.weight": {
4385
  "type": "Distributed",
4386
  "shape": [
4387
+ 768,
4388
+ 3072
4389
  ],
4390
  "dtype": "F32",
4391
  "chunks": [
 
4395
  0
4396
  ],
4397
  "shape": [
4398
+ 768,
4399
+ 1536
4400
  ],
4401
  "filename_index": 0
4402
  },
4403
  {
4404
  "offsets": [
4405
+ 0,
4406
+ 1536
4407
  ],
4408
  "shape": [
4409
+ 768,
4410
+ 1536
4411
  ],
4412
  "filename_index": 1
4413
  }
4414
  ]
4415
  },
4416
+ "h.11.mlp.c_proj.weight": {
4417
  "type": "Distributed",
4418
  "shape": [
4419
+ 3072,
4420
+ 768
4421
  ],
4422
  "dtype": "F32",
4423
  "chunks": [
 
4427
  0
4428
  ],
4429
  "shape": [
4430
+ 1536,
4431
+ 768
4432
  ],
4433
  "filename_index": 0
4434
  },
4435
  {
4436
  "offsets": [
4437
+ 1536,
4438
+ 0
4439
  ],
4440
  "shape": [
4441
+ 1536,
4442
+ 768
4443
  ],
4444
  "filename_index": 1
4445
  }
4446
  ]
4447
  },
4448
+ "h.5.attn.c_attn.weight": {
4449
  "type": "Distributed",
4450
  "shape": [
4451
  768,
 
4477
  }
4478
  ]
4479
  },
4480
+ "h.8.ln_2.weight": {
4481
  "type": "Distributed",
4482
  "shape": [
4483
  768
 
4504
  }
4505
  ]
4506
  },
4507
+ "h.8.mlp.c_proj.weight": {
4508
  "type": "Distributed",
4509
  "shape": [
4510
+ 3072,
4511
  768
4512
  ],
4513
  "dtype": "F32",
 
4518
  0
4519
  ],
4520
  "shape": [
4521
+ 1536,
4522
  768
4523
  ],
4524
  "filename_index": 0
4525
  },
4526
  {
4527
  "offsets": [
4528
+ 1536,
4529
  0
4530
  ],
4531
  "shape": [
4532
+ 1536,
4533
  768
4534
  ],
4535
  "filename_index": 1
4536
  }
4537
  ]
4538
  },
4539
+ "h.5.attn.c_attn.bias": {
4540
  "type": "Distributed",
4541
  "shape": [
4542
+ 2304
4543
  ],
4544
  "dtype": "F32",
4545
  "chunks": [
 
4548
  0
4549
  ],
4550
  "shape": [
4551
+ 1152
4552
  ],
4553
  "filename_index": 0
4554
  },
4555
  {
4556
  "offsets": [
4557
+ 1152
4558
  ],
4559
  "shape": [
4560
+ 1152
4561
  ],
4562
  "filename_index": 1
4563
  }
4564
  ]
4565
  },
4566
+ "h.3.ln_2.bias": {
4567
  "type": "Distributed",
4568
  "shape": [
4569
+ 768
 
4570
  ],
4571
  "dtype": "F32",
4572
  "chunks": [
4573
  {
4574
  "offsets": [
 
4575
  0
4576
  ],
4577
  "shape": [
4578
+ 384
 
4579
  ],
4580
  "filename_index": 0
4581
  },
4582
  {
4583
  "offsets": [
4584
+ 384
 
4585
  ],
4586
  "shape": [
4587
+ 384
 
4588
  ],
4589
  "filename_index": 1
4590
  }
4591
  ]
4592
  },
4593
+ "h.11.mlp.c_fc.weight": {
4594
  "type": "Distributed",
4595
  "shape": [
4596
+ 768,
4597
+ 3072
 
 
4598
  ],
4599
  "dtype": "F32",
4600
  "chunks": [
4601
  {
4602
  "offsets": [
 
 
4603
  0,
4604
  0
4605
  ],
4606
  "shape": [
4607
+ 768,
4608
+ 1536
 
 
4609
  ],
4610
  "filename_index": 0
4611
  },
4612
  {
4613
  "offsets": [
4614
  0,
4615
+ 1536
 
 
4616
  ],
4617
  "shape": [
4618
+ 768,
4619
+ 1536
 
 
4620
  ],
4621
  "filename_index": 1
4622
  }
4623
  ]
4624
  },
4625
+ "h.10.ln_1.weight": {
4626
  "type": "Distributed",
4627
  "shape": [
 
4628
  768
4629
  ],
4630
  "dtype": "F32",
4631
  "chunks": [
4632
  {
4633
  "offsets": [
 
4634
  0
4635
  ],
4636
  "shape": [
4637
+ 384
 
4638
  ],
4639
  "filename_index": 0
4640
  },
4641
  {
4642
  "offsets": [
4643
+ 384
 
4644
  ],
4645
  "shape": [
4646
+ 384
 
4647
  ],
4648
  "filename_index": 1
4649
  }
4650
  ]
4651
  },
4652
+ "h.10.attn.bias": {
4653
  "type": "Distributed",
4654
  "shape": [
4655
+ 1,
4656
+ 1,
4657
+ 1024,
4658
+ 1024
4659
  ],
4660
  "dtype": "F32",
4661
  "chunks": [
4662
  {
4663
  "offsets": [
4664
+ 0,
4665
+ 0,
4666
+ 0,
4667
  0
4668
  ],
4669
  "shape": [
4670
+ 1,
4671
+ 1,
4672
+ 1024,
4673
+ 512
4674
  ],
4675
  "filename_index": 0
4676
  },
4677
  {
4678
  "offsets": [
4679
+ 0,
4680
+ 0,
4681
+ 0,
4682
+ 512
4683
  ],
4684
  "shape": [
4685
+ 1,
4686
+ 1,
4687
+ 1024,
4688
+ 512
4689
  ],
4690
  "filename_index": 1
4691
  }
4692
  ]
4693
  },
4694
+ "h.1.mlp.c_proj.bias": {
4695
  "type": "Distributed",
4696
  "shape": [
4697
  768
 
4718
  }
4719
  ]
4720
  },
4721
+ "h.1.mlp.c_fc.weight": {
4722
  "type": "Distributed",
4723
  "shape": [
4724
+ 768,
4725
+ 3072
4726
  ],
4727
  "dtype": "F32",
4728
  "chunks": [
 
4732
  0
4733
  ],
4734
  "shape": [
4735
+ 768,
4736
+ 1536
4737
  ],
4738
  "filename_index": 0
4739
  },
4740
  {
4741
  "offsets": [
4742
+ 0,
4743
+ 1536
4744
  ],
4745
  "shape": [
4746
+ 768,
4747
+ 1536
4748
  ],
4749
  "filename_index": 1
4750
  }