Narsil HF Staff commited on
Commit
40773e8
·
verified ·
1 Parent(s): 492bdfd

Upload topology.json

Browse files
Files changed (1) hide show
  1. topology.json +915 -915
topology.json CHANGED
@@ -1,38 +1,33 @@
1
  {
2
  "tensors": {
3
- "h.5.attn.c_proj.weight": {
4
  "type": "Distributed",
5
  "shape": [
6
- 768,
7
  768
8
  ],
9
  "dtype": "F32",
10
  "chunks": [
11
  {
12
  "offsets": [
13
- 0,
14
  0
15
  ],
16
  "shape": [
17
- 384,
18
- 768
19
  ],
20
  "filename_index": 0
21
  },
22
  {
23
  "offsets": [
24
- 384,
25
- 0
26
  ],
27
  "shape": [
28
- 384,
29
- 768
30
  ],
31
  "filename_index": 1
32
  }
33
  ]
34
  },
35
- "h.7.ln_2.weight": {
36
  "type": "Distributed",
37
  "shape": [
38
  768
@@ -59,11 +54,11 @@
59
  }
60
  ]
61
  },
62
- "h.8.attn.c_attn.weight": {
63
  "type": "Distributed",
64
  "shape": [
65
  768,
66
- 2304
67
  ],
68
  "dtype": "F32",
69
  "chunks": [
@@ -74,54 +69,69 @@
74
  ],
75
  "shape": [
76
  768,
77
- 1152
78
  ],
79
  "filename_index": 0
80
  },
81
  {
82
  "offsets": [
83
  0,
84
- 1152
85
  ],
86
  "shape": [
87
  768,
88
- 1152
89
  ],
90
  "filename_index": 1
91
  }
92
  ]
93
  },
94
- "h.11.mlp.c_fc.bias": {
95
  "type": "Distributed",
96
  "shape": [
97
- 3072
 
 
 
98
  ],
99
  "dtype": "F32",
100
  "chunks": [
101
  {
102
  "offsets": [
 
 
 
103
  0
104
  ],
105
  "shape": [
106
- 1536
 
 
 
107
  ],
108
  "filename_index": 0
109
  },
110
  {
111
  "offsets": [
112
- 1536
 
 
 
113
  ],
114
  "shape": [
115
- 1536
 
 
 
116
  ],
117
  "filename_index": 1
118
  }
119
  ]
120
  },
121
- "h.11.ln_1.weight": {
122
  "type": "Distributed",
123
  "shape": [
124
- 768
125
  ],
126
  "dtype": "F32",
127
  "chunks": [
@@ -130,49 +140,54 @@
130
  0
131
  ],
132
  "shape": [
133
- 384
134
  ],
135
  "filename_index": 0
136
  },
137
  {
138
  "offsets": [
139
- 384
140
  ],
141
  "shape": [
142
- 384
143
  ],
144
  "filename_index": 1
145
  }
146
  ]
147
  },
148
- "h.9.mlp.c_fc.bias": {
149
  "type": "Distributed",
150
  "shape": [
 
151
  3072
152
  ],
153
  "dtype": "F32",
154
  "chunks": [
155
  {
156
  "offsets": [
 
157
  0
158
  ],
159
  "shape": [
 
160
  1536
161
  ],
162
  "filename_index": 0
163
  },
164
  {
165
  "offsets": [
 
166
  1536
167
  ],
168
  "shape": [
 
169
  1536
170
  ],
171
  "filename_index": 1
172
  }
173
  ]
174
  },
175
- "h.3.ln_2.weight": {
176
  "type": "Distributed",
177
  "shape": [
178
  768
@@ -199,7 +214,7 @@
199
  }
200
  ]
201
  },
202
- "h.6.mlp.c_proj.bias": {
203
  "type": "Distributed",
204
  "shape": [
205
  768
@@ -226,7 +241,7 @@
226
  }
227
  ]
228
  },
229
- "h.1.ln_1.weight": {
230
  "type": "Distributed",
231
  "shape": [
232
  768
@@ -253,53 +268,38 @@
253
  }
254
  ]
255
  },
256
- "h.1.attn.bias": {
257
  "type": "Distributed",
258
  "shape": [
259
- 1,
260
- 1,
261
- 1024,
262
- 1024
263
  ],
264
  "dtype": "F32",
265
  "chunks": [
266
  {
267
  "offsets": [
268
- 0,
269
- 0,
270
- 0,
271
  0
272
  ],
273
  "shape": [
274
- 1,
275
- 1,
276
- 1024,
277
- 512
278
  ],
279
  "filename_index": 0
280
  },
281
  {
282
  "offsets": [
283
- 0,
284
- 0,
285
- 0,
286
- 512
287
  ],
288
  "shape": [
289
- 1,
290
- 1,
291
- 1024,
292
- 512
293
  ],
294
  "filename_index": 1
295
  }
296
  ]
297
  },
298
- "h.1.mlp.c_fc.weight": {
299
  "type": "Distributed",
300
  "shape": [
301
  768,
302
- 3072
303
  ],
304
  "dtype": "F32",
305
  "chunks": [
@@ -310,24 +310,24 @@
310
  ],
311
  "shape": [
312
  768,
313
- 1536
314
  ],
315
  "filename_index": 0
316
  },
317
  {
318
  "offsets": [
319
  0,
320
- 1536
321
  ],
322
  "shape": [
323
  768,
324
- 1536
325
  ],
326
  "filename_index": 1
327
  }
328
  ]
329
  },
330
- "h.6.ln_1.weight": {
331
  "type": "Distributed",
332
  "shape": [
333
  768
@@ -354,7 +354,7 @@
354
  }
355
  ]
356
  },
357
- "ln_f.weight": {
358
  "type": "Distributed",
359
  "shape": [
360
  768
@@ -381,65 +381,75 @@
381
  }
382
  ]
383
  },
384
- "h.10.ln_2.bias": {
385
  "type": "Distributed",
386
  "shape": [
387
- 768
 
388
  ],
389
  "dtype": "F32",
390
  "chunks": [
391
  {
392
  "offsets": [
 
393
  0
394
  ],
395
  "shape": [
396
- 384
 
397
  ],
398
  "filename_index": 0
399
  },
400
  {
401
  "offsets": [
402
- 384
 
403
  ],
404
  "shape": [
405
- 384
 
406
  ],
407
  "filename_index": 1
408
  }
409
  ]
410
  },
411
- "h.5.attn.c_proj.bias": {
412
  "type": "Distributed",
413
  "shape": [
 
414
  768
415
  ],
416
  "dtype": "F32",
417
  "chunks": [
418
  {
419
  "offsets": [
 
420
  0
421
  ],
422
  "shape": [
423
- 384
 
424
  ],
425
  "filename_index": 0
426
  },
427
  {
428
  "offsets": [
429
- 384
 
430
  ],
431
  "shape": [
432
- 384
 
433
  ],
434
  "filename_index": 1
435
  }
436
  ]
437
  },
438
- "h.10.mlp.c_fc.weight": {
439
  "type": "Distributed",
440
  "shape": [
441
  768,
442
- 3072
443
  ],
444
  "dtype": "F32",
445
  "chunks": [
@@ -449,60 +459,55 @@
449
  0
450
  ],
451
  "shape": [
452
- 768,
453
- 1536
454
  ],
455
  "filename_index": 0
456
  },
457
  {
458
  "offsets": [
459
- 0,
460
- 1536
461
  ],
462
  "shape": [
463
- 768,
464
- 1536
465
  ],
466
  "filename_index": 1
467
  }
468
  ]
469
  },
470
- "h.2.mlp.c_fc.weight": {
471
  "type": "Distributed",
472
  "shape": [
473
- 768,
474
- 3072
475
  ],
476
  "dtype": "F32",
477
  "chunks": [
478
  {
479
  "offsets": [
480
- 0,
481
  0
482
  ],
483
  "shape": [
484
- 768,
485
- 1536
486
  ],
487
  "filename_index": 0
488
  },
489
  {
490
  "offsets": [
491
- 0,
492
- 1536
493
  ],
494
  "shape": [
495
- 768,
496
- 1536
497
  ],
498
  "filename_index": 1
499
  }
500
  ]
501
  },
502
- "h.4.attn.c_attn.bias": {
503
  "type": "Distributed",
504
  "shape": [
505
- 2304
506
  ],
507
  "dtype": "F32",
508
  "chunks": [
@@ -511,25 +516,25 @@
511
  0
512
  ],
513
  "shape": [
514
- 1152
515
  ],
516
  "filename_index": 0
517
  },
518
  {
519
  "offsets": [
520
- 1152
521
  ],
522
  "shape": [
523
- 1152
524
  ],
525
  "filename_index": 1
526
  }
527
  ]
528
  },
529
- "h.11.attn.c_attn.bias": {
530
  "type": "Distributed",
531
  "shape": [
532
- 2304
533
  ],
534
  "dtype": "F32",
535
  "chunks": [
@@ -538,130 +543,150 @@
538
  0
539
  ],
540
  "shape": [
541
- 1152
542
  ],
543
  "filename_index": 0
544
  },
545
  {
546
  "offsets": [
547
- 1152
548
  ],
549
  "shape": [
550
- 1152
551
  ],
552
  "filename_index": 1
553
  }
554
  ]
555
  },
556
- "h.11.ln_1.bias": {
557
  "type": "Distributed",
558
  "shape": [
559
- 768
 
560
  ],
561
  "dtype": "F32",
562
  "chunks": [
563
  {
564
  "offsets": [
 
565
  0
566
  ],
567
  "shape": [
568
- 384
 
569
  ],
570
  "filename_index": 0
571
  },
572
  {
573
  "offsets": [
574
- 384
 
575
  ],
576
  "shape": [
577
- 384
 
578
  ],
579
  "filename_index": 1
580
  }
581
  ]
582
  },
583
- "h.0.ln_1.weight": {
584
  "type": "Distributed",
585
  "shape": [
 
586
  768
587
  ],
588
  "dtype": "F32",
589
  "chunks": [
590
  {
591
  "offsets": [
 
592
  0
593
  ],
594
  "shape": [
595
- 384
 
596
  ],
597
  "filename_index": 0
598
  },
599
  {
600
  "offsets": [
601
- 384
 
602
  ],
603
  "shape": [
604
- 384
 
605
  ],
606
  "filename_index": 1
607
  }
608
  ]
609
  },
610
- "h.9.ln_2.weight": {
611
  "type": "Distributed",
612
  "shape": [
613
- 768
 
614
  ],
615
  "dtype": "F32",
616
  "chunks": [
617
  {
618
  "offsets": [
 
619
  0
620
  ],
621
  "shape": [
622
- 384
 
623
  ],
624
  "filename_index": 0
625
  },
626
  {
627
  "offsets": [
628
- 384
 
629
  ],
630
  "shape": [
631
- 384
 
632
  ],
633
  "filename_index": 1
634
  }
635
  ]
636
  },
637
- "h.9.attn.c_attn.bias": {
638
  "type": "Distributed",
639
  "shape": [
640
- 2304
 
641
  ],
642
  "dtype": "F32",
643
  "chunks": [
644
  {
645
  "offsets": [
 
646
  0
647
  ],
648
  "shape": [
649
- 1152
 
650
  ],
651
  "filename_index": 0
652
  },
653
  {
654
  "offsets": [
655
- 1152
 
656
  ],
657
  "shape": [
658
- 1152
 
659
  ],
660
  "filename_index": 1
661
  }
662
  ]
663
  },
664
- "h.2.attn.bias": {
665
  "type": "Distributed",
666
  "shape": [
667
  1,
@@ -703,37 +728,42 @@
703
  }
704
  ]
705
  },
706
- "h.0.mlp.c_fc.bias": {
707
  "type": "Distributed",
708
  "shape": [
 
709
  3072
710
  ],
711
  "dtype": "F32",
712
  "chunks": [
713
  {
714
  "offsets": [
 
715
  0
716
  ],
717
  "shape": [
 
718
  1536
719
  ],
720
  "filename_index": 0
721
  },
722
  {
723
  "offsets": [
 
724
  1536
725
  ],
726
  "shape": [
 
727
  1536
728
  ],
729
  "filename_index": 1
730
  }
731
  ]
732
  },
733
- "h.0.attn.c_attn.bias": {
734
  "type": "Distributed",
735
  "shape": [
736
- 2304
737
  ],
738
  "dtype": "F32",
739
  "chunks": [
@@ -742,26 +772,26 @@
742
  0
743
  ],
744
  "shape": [
745
- 1152
746
  ],
747
  "filename_index": 0
748
  },
749
  {
750
  "offsets": [
751
- 1152
752
  ],
753
  "shape": [
754
- 1152
755
  ],
756
  "filename_index": 1
757
  }
758
  ]
759
  },
760
- "h.2.mlp.c_proj.weight": {
761
  "type": "Distributed",
762
  "shape": [
763
- 3072,
764
- 768
765
  ],
766
  "dtype": "F32",
767
  "chunks": [
@@ -771,146 +801,146 @@
771
  0
772
  ],
773
  "shape": [
774
- 1536,
775
- 768
776
  ],
777
  "filename_index": 0
778
  },
779
  {
780
  "offsets": [
781
- 1536,
782
- 0
783
  ],
784
  "shape": [
785
- 1536,
786
- 768
787
  ],
788
  "filename_index": 1
789
  }
790
  ]
791
  },
792
- "h.1.attn.c_attn.bias": {
793
  "type": "Distributed",
794
  "shape": [
795
- 2304
 
796
  ],
797
  "dtype": "F32",
798
  "chunks": [
799
  {
800
  "offsets": [
 
801
  0
802
  ],
803
  "shape": [
804
- 1152
 
805
  ],
806
  "filename_index": 0
807
  },
808
  {
809
  "offsets": [
810
- 1152
 
811
  ],
812
  "shape": [
813
- 1152
 
814
  ],
815
  "filename_index": 1
816
  }
817
  ]
818
  },
819
- "h.3.mlp.c_proj.weight": {
820
  "type": "Distributed",
821
  "shape": [
822
- 3072,
823
- 768
824
  ],
825
  "dtype": "F32",
826
  "chunks": [
827
  {
828
  "offsets": [
829
- 0,
830
  0
831
  ],
832
  "shape": [
833
- 1536,
834
- 768
835
  ],
836
  "filename_index": 0
837
  },
838
  {
839
  "offsets": [
840
- 1536,
841
- 0
842
  ],
843
  "shape": [
844
- 1536,
845
- 768
846
  ],
847
  "filename_index": 1
848
  }
849
  ]
850
  },
851
- "h.5.attn.c_attn.bias": {
852
  "type": "Distributed",
853
  "shape": [
854
- 2304
 
855
  ],
856
  "dtype": "F32",
857
  "chunks": [
858
  {
859
  "offsets": [
 
860
  0
861
  ],
862
  "shape": [
863
- 1152
 
864
  ],
865
  "filename_index": 0
866
  },
867
  {
868
  "offsets": [
869
- 1152
 
870
  ],
871
  "shape": [
872
- 1152
 
873
  ],
874
  "filename_index": 1
875
  }
876
  ]
877
  },
878
- "h.0.attn.c_attn.weight": {
879
  "type": "Distributed",
880
  "shape": [
881
- 768,
882
- 2304
883
  ],
884
  "dtype": "F32",
885
  "chunks": [
886
  {
887
  "offsets": [
888
- 0,
889
  0
890
  ],
891
  "shape": [
892
- 768,
893
- 1152
894
  ],
895
  "filename_index": 0
896
  },
897
  {
898
  "offsets": [
899
- 0,
900
- 1152
901
  ],
902
  "shape": [
903
- 768,
904
- 1152
905
  ],
906
  "filename_index": 1
907
  }
908
  ]
909
  },
910
- "h.2.attn.c_proj.bias": {
911
  "type": "Distributed",
912
  "shape": [
913
- 768
914
  ],
915
  "dtype": "F32",
916
  "chunks": [
@@ -919,25 +949,25 @@
919
  0
920
  ],
921
  "shape": [
922
- 384
923
  ],
924
  "filename_index": 0
925
  },
926
  {
927
  "offsets": [
928
- 384
929
  ],
930
  "shape": [
931
- 384
932
  ],
933
  "filename_index": 1
934
  }
935
  ]
936
  },
937
- "h.7.attn.c_proj.bias": {
938
  "type": "Distributed",
939
  "shape": [
940
- 768
941
  ],
942
  "dtype": "F32",
943
  "chunks": [
@@ -946,22 +976,22 @@
946
  0
947
  ],
948
  "shape": [
949
- 384
950
  ],
951
  "filename_index": 0
952
  },
953
  {
954
  "offsets": [
955
- 384
956
  ],
957
  "shape": [
958
- 384
959
  ],
960
  "filename_index": 1
961
  }
962
  ]
963
  },
964
- "h.9.ln_1.bias": {
965
  "type": "Distributed",
966
  "shape": [
967
  768
@@ -988,93 +1018,98 @@
988
  }
989
  ]
990
  },
991
- "h.2.mlp.c_proj.bias": {
992
  "type": "Distributed",
993
  "shape": [
 
994
  768
995
  ],
996
  "dtype": "F32",
997
  "chunks": [
998
  {
999
  "offsets": [
 
1000
  0
1001
  ],
1002
  "shape": [
1003
- 384
 
1004
  ],
1005
  "filename_index": 0
1006
  },
1007
  {
1008
  "offsets": [
1009
- 384
 
1010
  ],
1011
  "shape": [
1012
- 384
 
1013
  ],
1014
  "filename_index": 1
1015
  }
1016
  ]
1017
  },
1018
- "h.10.ln_1.bias": {
1019
  "type": "Distributed",
1020
  "shape": [
 
1021
  768
1022
  ],
1023
  "dtype": "F32",
1024
  "chunks": [
1025
  {
1026
  "offsets": [
 
1027
  0
1028
  ],
1029
  "shape": [
1030
- 384
 
1031
  ],
1032
  "filename_index": 0
1033
  },
1034
  {
1035
  "offsets": [
1036
- 384
 
1037
  ],
1038
  "shape": [
1039
- 384
 
1040
  ],
1041
  "filename_index": 1
1042
  }
1043
  ]
1044
  },
1045
- "h.10.mlp.c_proj.weight": {
1046
  "type": "Distributed",
1047
  "shape": [
1048
- 3072,
1049
- 768
1050
  ],
1051
  "dtype": "F32",
1052
  "chunks": [
1053
  {
1054
  "offsets": [
1055
- 0,
1056
  0
1057
  ],
1058
  "shape": [
1059
- 1536,
1060
- 768
1061
  ],
1062
  "filename_index": 0
1063
  },
1064
  {
1065
  "offsets": [
1066
- 1536,
1067
- 0
1068
  ],
1069
  "shape": [
1070
- 1536,
1071
- 768
1072
  ],
1073
  "filename_index": 1
1074
  }
1075
  ]
1076
  },
1077
- "h.6.attn.c_proj.bias": {
1078
  "type": "Distributed",
1079
  "shape": [
1080
  768
@@ -1101,7 +1136,7 @@
1101
  }
1102
  ]
1103
  },
1104
- "h.8.ln_1.weight": {
1105
  "type": "Distributed",
1106
  "shape": [
1107
  768
@@ -1128,66 +1163,91 @@
1128
  }
1129
  ]
1130
  },
1131
- "h.10.attn.c_attn.bias": {
1132
  "type": "Distributed",
1133
  "shape": [
1134
- 2304
 
 
 
1135
  ],
1136
  "dtype": "F32",
1137
  "chunks": [
1138
  {
1139
  "offsets": [
 
 
 
1140
  0
1141
  ],
1142
  "shape": [
1143
- 1152
 
 
 
1144
  ],
1145
  "filename_index": 0
1146
  },
1147
  {
1148
  "offsets": [
1149
- 1152
 
 
 
1150
  ],
1151
  "shape": [
1152
- 1152
 
 
 
1153
  ],
1154
  "filename_index": 1
1155
  }
1156
  ]
1157
  },
1158
- "h.6.mlp.c_fc.weight": {
1159
  "type": "Distributed",
1160
  "shape": [
1161
- 768,
1162
- 3072
 
 
1163
  ],
1164
  "dtype": "F32",
1165
  "chunks": [
1166
  {
1167
  "offsets": [
 
 
1168
  0,
1169
  0
1170
  ],
1171
  "shape": [
1172
- 768,
1173
- 1536
 
 
1174
  ],
1175
  "filename_index": 0
1176
  },
1177
  {
1178
  "offsets": [
1179
  0,
1180
- 1536
 
 
1181
  ],
1182
  "shape": [
1183
- 768,
1184
- 1536
 
 
1185
  ],
1186
  "filename_index": 1
1187
  }
1188
  ]
1189
  },
1190
- "h.6.ln_2.bias": {
1191
  "type": "Distributed",
1192
  "shape": [
1193
  768
@@ -1214,34 +1274,39 @@
1214
  }
1215
  ]
1216
  },
1217
- "h.1.ln_2.weight": {
1218
  "type": "Distributed",
1219
  "shape": [
 
1220
  768
1221
  ],
1222
  "dtype": "F32",
1223
  "chunks": [
1224
  {
1225
  "offsets": [
 
1226
  0
1227
  ],
1228
  "shape": [
1229
- 384
 
1230
  ],
1231
  "filename_index": 0
1232
  },
1233
  {
1234
  "offsets": [
1235
- 384
 
1236
  ],
1237
  "shape": [
1238
- 384
 
1239
  ],
1240
  "filename_index": 1
1241
  }
1242
  ]
1243
  },
1244
- "h.0.ln_1.bias": {
1245
  "type": "Distributed",
1246
  "shape": [
1247
  768
@@ -1268,38 +1333,43 @@
1268
  }
1269
  ]
1270
  },
1271
- "h.4.ln_2.bias": {
1272
  "type": "Distributed",
1273
  "shape": [
 
1274
  768
1275
  ],
1276
  "dtype": "F32",
1277
  "chunks": [
1278
  {
1279
  "offsets": [
 
1280
  0
1281
  ],
1282
  "shape": [
1283
- 384
 
1284
  ],
1285
  "filename_index": 0
1286
  },
1287
  {
1288
  "offsets": [
1289
- 384
 
1290
  ],
1291
  "shape": [
1292
- 384
 
1293
  ],
1294
  "filename_index": 1
1295
  }
1296
  ]
1297
  },
1298
- "wte.weight": {
1299
  "type": "Distributed",
1300
  "shape": [
1301
- 50257,
1302
- 768
1303
  ],
1304
  "dtype": "F32",
1305
  "chunks": [
@@ -1309,25 +1379,25 @@
1309
  0
1310
  ],
1311
  "shape": [
1312
- 50257,
1313
- 384
1314
  ],
1315
  "filename_index": 0
1316
  },
1317
  {
1318
  "offsets": [
1319
  0,
1320
- 384
1321
  ],
1322
  "shape": [
1323
- 50257,
1324
- 384
1325
  ],
1326
  "filename_index": 1
1327
  }
1328
  ]
1329
  },
1330
- "h.3.ln_1.bias": {
1331
  "type": "Distributed",
1332
  "shape": [
1333
  768
@@ -1354,11 +1424,11 @@
1354
  }
1355
  ]
1356
  },
1357
- "h.6.attn.c_proj.weight": {
1358
  "type": "Distributed",
1359
  "shape": [
1360
  768,
1361
- 768
1362
  ],
1363
  "dtype": "F32",
1364
  "chunks": [
@@ -1368,52 +1438,57 @@
1368
  0
1369
  ],
1370
  "shape": [
1371
- 384,
1372
- 768
1373
  ],
1374
  "filename_index": 0
1375
  },
1376
  {
1377
  "offsets": [
1378
- 384,
1379
- 0
1380
  ],
1381
  "shape": [
1382
- 384,
1383
- 768
1384
  ],
1385
  "filename_index": 1
1386
  }
1387
  ]
1388
  },
1389
- "h.2.ln_2.bias": {
1390
  "type": "Distributed",
1391
  "shape": [
 
1392
  768
1393
  ],
1394
  "dtype": "F32",
1395
  "chunks": [
1396
  {
1397
  "offsets": [
 
1398
  0
1399
  ],
1400
  "shape": [
1401
- 384
 
1402
  ],
1403
  "filename_index": 0
1404
  },
1405
  {
1406
  "offsets": [
1407
- 384
 
1408
  ],
1409
  "shape": [
1410
- 384
 
1411
  ],
1412
  "filename_index": 1
1413
  }
1414
  ]
1415
  },
1416
- "h.7.ln_2.bias": {
1417
  "type": "Distributed",
1418
  "shape": [
1419
  768
@@ -1440,7 +1515,7 @@
1440
  }
1441
  ]
1442
  },
1443
- "h.4.mlp.c_proj.bias": {
1444
  "type": "Distributed",
1445
  "shape": [
1446
  768
@@ -1467,148 +1542,96 @@
1467
  }
1468
  ]
1469
  },
1470
- "h.7.mlp.c_proj.weight": {
1471
  "type": "Distributed",
1472
  "shape": [
1473
- 3072,
1474
- 768
1475
  ],
1476
  "dtype": "F32",
1477
  "chunks": [
1478
  {
1479
  "offsets": [
1480
- 0,
1481
  0
1482
  ],
1483
  "shape": [
1484
- 1536,
1485
- 768
1486
  ],
1487
  "filename_index": 0
1488
  },
1489
  {
1490
  "offsets": [
1491
- 1536,
1492
- 0
1493
  ],
1494
  "shape": [
1495
- 1536,
1496
- 768
1497
  ],
1498
  "filename_index": 1
1499
  }
1500
  ]
1501
  },
1502
- "h.3.attn.c_proj.weight": {
1503
  "type": "Distributed",
1504
  "shape": [
1505
  768,
1506
- 768
1507
- ],
1508
- "dtype": "F32",
1509
- "chunks": [
1510
- {
1511
- "offsets": [
1512
- 0,
1513
- 0
1514
- ],
1515
- "shape": [
1516
- 384,
1517
- 768
1518
- ],
1519
- "filename_index": 0
1520
- },
1521
- {
1522
- "offsets": [
1523
- 384,
1524
- 0
1525
- ],
1526
- "shape": [
1527
- 384,
1528
- 768
1529
- ],
1530
- "filename_index": 1
1531
- }
1532
- ]
1533
- },
1534
- "h.5.attn.bias": {
1535
- "type": "Distributed",
1536
- "shape": [
1537
- 1,
1538
- 1,
1539
- 1024,
1540
- 1024
1541
  ],
1542
  "dtype": "F32",
1543
  "chunks": [
1544
  {
1545
  "offsets": [
1546
- 0,
1547
- 0,
1548
  0,
1549
  0
1550
  ],
1551
  "shape": [
1552
- 1,
1553
- 1,
1554
- 1024,
1555
- 512
1556
  ],
1557
  "filename_index": 0
1558
  },
1559
  {
1560
  "offsets": [
1561
  0,
1562
- 0,
1563
- 0,
1564
- 512
1565
  ],
1566
  "shape": [
1567
- 1,
1568
- 1,
1569
- 1024,
1570
- 512
1571
  ],
1572
  "filename_index": 1
1573
  }
1574
  ]
1575
  },
1576
- "h.8.mlp.c_proj.weight": {
1577
  "type": "Distributed",
1578
  "shape": [
1579
- 3072,
1580
  768
1581
  ],
1582
  "dtype": "F32",
1583
  "chunks": [
1584
  {
1585
  "offsets": [
1586
- 0,
1587
  0
1588
  ],
1589
  "shape": [
1590
- 1536,
1591
- 768
1592
  ],
1593
  "filename_index": 0
1594
  },
1595
  {
1596
  "offsets": [
1597
- 1536,
1598
- 0
1599
  ],
1600
  "shape": [
1601
- 1536,
1602
- 768
1603
  ],
1604
  "filename_index": 1
1605
  }
1606
  ]
1607
  },
1608
- "h.4.ln_2.weight": {
1609
  "type": "Distributed",
1610
  "shape": [
1611
- 768
1612
  ],
1613
  "dtype": "F32",
1614
  "chunks": [
@@ -1617,22 +1640,22 @@
1617
  0
1618
  ],
1619
  "shape": [
1620
- 384
1621
  ],
1622
  "filename_index": 0
1623
  },
1624
  {
1625
  "offsets": [
1626
- 384
1627
  ],
1628
  "shape": [
1629
- 384
1630
  ],
1631
  "filename_index": 1
1632
  }
1633
  ]
1634
  },
1635
- "h.10.attn.c_proj.weight": {
1636
  "type": "Distributed",
1637
  "shape": [
1638
  768,
@@ -1664,10 +1687,10 @@
1664
  }
1665
  ]
1666
  },
1667
- "h.0.ln_2.weight": {
1668
  "type": "Distributed",
1669
  "shape": [
1670
- 768
1671
  ],
1672
  "dtype": "F32",
1673
  "chunks": [
@@ -1676,76 +1699,86 @@
1676
  0
1677
  ],
1678
  "shape": [
1679
- 384
1680
  ],
1681
  "filename_index": 0
1682
  },
1683
  {
1684
  "offsets": [
1685
- 384
1686
  ],
1687
  "shape": [
1688
- 384
1689
  ],
1690
  "filename_index": 1
1691
  }
1692
  ]
1693
  },
1694
- "h.4.ln_1.bias": {
1695
  "type": "Distributed",
1696
  "shape": [
 
1697
  768
1698
  ],
1699
  "dtype": "F32",
1700
  "chunks": [
1701
  {
1702
  "offsets": [
 
1703
  0
1704
  ],
1705
  "shape": [
1706
- 384
 
1707
  ],
1708
  "filename_index": 0
1709
  },
1710
  {
1711
  "offsets": [
1712
- 384
 
1713
  ],
1714
  "shape": [
1715
- 384
 
1716
  ],
1717
  "filename_index": 1
1718
  }
1719
  ]
1720
  },
1721
- "h.7.ln_1.weight": {
1722
  "type": "Distributed",
1723
  "shape": [
 
1724
  768
1725
  ],
1726
  "dtype": "F32",
1727
  "chunks": [
1728
  {
1729
  "offsets": [
 
1730
  0
1731
  ],
1732
  "shape": [
 
1733
  384
1734
  ],
1735
  "filename_index": 0
1736
  },
1737
  {
1738
  "offsets": [
 
1739
  384
1740
  ],
1741
  "shape": [
 
1742
  384
1743
  ],
1744
  "filename_index": 1
1745
  }
1746
  ]
1747
  },
1748
- "h.5.mlp.c_proj.bias": {
1749
  "type": "Distributed",
1750
  "shape": [
1751
  768
@@ -1772,34 +1805,39 @@
1772
  }
1773
  ]
1774
  },
1775
- "h.1.mlp.c_fc.bias": {
1776
  "type": "Distributed",
1777
  "shape": [
1778
- 3072
 
1779
  ],
1780
  "dtype": "F32",
1781
  "chunks": [
1782
  {
1783
  "offsets": [
 
1784
  0
1785
  ],
1786
  "shape": [
1787
- 1536
 
1788
  ],
1789
  "filename_index": 0
1790
  },
1791
  {
1792
  "offsets": [
1793
- 1536
 
1794
  ],
1795
  "shape": [
1796
- 1536
 
1797
  ],
1798
  "filename_index": 1
1799
  }
1800
  ]
1801
  },
1802
- "h.1.mlp.c_proj.bias": {
1803
  "type": "Distributed",
1804
  "shape": [
1805
  768
@@ -1826,7 +1864,7 @@
1826
  }
1827
  ]
1828
  },
1829
- "h.10.ln_1.weight": {
1830
  "type": "Distributed",
1831
  "shape": [
1832
  768
@@ -1853,102 +1891,112 @@
1853
  }
1854
  ]
1855
  },
1856
- "h.3.attn.c_attn.weight": {
1857
  "type": "Distributed",
1858
  "shape": [
1859
- 768,
1860
- 2304
1861
  ],
1862
  "dtype": "F32",
1863
  "chunks": [
1864
  {
1865
  "offsets": [
1866
- 0,
1867
  0
1868
  ],
1869
  "shape": [
1870
- 768,
1871
- 1152
1872
  ],
1873
  "filename_index": 0
1874
  },
1875
  {
1876
  "offsets": [
1877
- 0,
1878
- 1152
1879
  ],
1880
  "shape": [
1881
- 768,
1882
- 1152
1883
  ],
1884
  "filename_index": 1
1885
  }
1886
  ]
1887
  },
1888
- "h.1.attn.c_proj.bias": {
1889
  "type": "Distributed",
1890
  "shape": [
 
1891
  768
1892
  ],
1893
  "dtype": "F32",
1894
  "chunks": [
1895
  {
1896
  "offsets": [
 
1897
  0
1898
  ],
1899
  "shape": [
1900
- 384
 
1901
  ],
1902
  "filename_index": 0
1903
  },
1904
  {
1905
  "offsets": [
1906
- 384
 
1907
  ],
1908
  "shape": [
1909
- 384
 
1910
  ],
1911
  "filename_index": 1
1912
  }
1913
  ]
1914
  },
1915
- "h.8.attn.c_proj.weight": {
1916
  "type": "Distributed",
1917
  "shape": [
1918
- 768,
1919
- 768
 
 
1920
  ],
1921
  "dtype": "F32",
1922
  "chunks": [
1923
  {
1924
  "offsets": [
 
 
1925
  0,
1926
  0
1927
  ],
1928
  "shape": [
1929
- 384,
1930
- 768
 
 
1931
  ],
1932
  "filename_index": 0
1933
  },
1934
  {
1935
  "offsets": [
1936
- 384,
1937
- 0
 
 
1938
  ],
1939
  "shape": [
1940
- 384,
1941
- 768
 
 
1942
  ],
1943
  "filename_index": 1
1944
  }
1945
  ]
1946
  },
1947
- "h.8.mlp.c_fc.weight": {
1948
  "type": "Distributed",
1949
  "shape": [
1950
  768,
1951
- 3072
1952
  ],
1953
  "dtype": "F32",
1954
  "chunks": [
@@ -1959,27 +2007,27 @@
1959
  ],
1960
  "shape": [
1961
  768,
1962
- 1536
1963
  ],
1964
  "filename_index": 0
1965
  },
1966
  {
1967
  "offsets": [
1968
  0,
1969
- 1536
1970
  ],
1971
  "shape": [
1972
  768,
1973
- 1536
1974
  ],
1975
  "filename_index": 1
1976
  }
1977
  ]
1978
  },
1979
- "h.6.mlp.c_fc.bias": {
1980
  "type": "Distributed",
1981
  "shape": [
1982
- 3072
1983
  ],
1984
  "dtype": "F32",
1985
  "chunks": [
@@ -1988,49 +2036,64 @@
1988
  0
1989
  ],
1990
  "shape": [
1991
- 1536
1992
  ],
1993
  "filename_index": 0
1994
  },
1995
  {
1996
  "offsets": [
1997
- 1536
1998
  ],
1999
  "shape": [
2000
- 1536
2001
  ],
2002
  "filename_index": 1
2003
  }
2004
  ]
2005
  },
2006
- "h.7.mlp.c_proj.bias": {
2007
  "type": "Distributed",
2008
  "shape": [
2009
- 768
 
 
 
2010
  ],
2011
  "dtype": "F32",
2012
  "chunks": [
2013
  {
2014
  "offsets": [
 
 
 
2015
  0
2016
  ],
2017
  "shape": [
2018
- 384
 
 
 
2019
  ],
2020
  "filename_index": 0
2021
  },
2022
  {
2023
  "offsets": [
2024
- 384
 
 
 
2025
  ],
2026
  "shape": [
2027
- 384
 
 
 
2028
  ],
2029
  "filename_index": 1
2030
  }
2031
  ]
2032
  },
2033
- "h.0.mlp.c_fc.weight": {
2034
  "type": "Distributed",
2035
  "shape": [
2036
  768,
@@ -2062,11 +2125,11 @@
2062
  }
2063
  ]
2064
  },
2065
- "h.11.mlp.c_proj.weight": {
2066
  "type": "Distributed",
2067
  "shape": [
2068
- 3072,
2069
- 768
2070
  ],
2071
  "dtype": "F32",
2072
  "chunks": [
@@ -2076,57 +2139,79 @@
2076
  0
2077
  ],
2078
  "shape": [
2079
- 1536,
2080
- 768
2081
  ],
2082
  "filename_index": 0
2083
  },
2084
  {
2085
  "offsets": [
2086
- 1536,
2087
- 0
2088
  ],
2089
  "shape": [
2090
- 1536,
2091
- 768
2092
  ],
2093
  "filename_index": 1
2094
  }
2095
  ]
2096
  },
2097
- "h.9.attn.c_proj.weight": {
2098
  "type": "Distributed",
2099
  "shape": [
2100
- 768,
2101
  768
2102
  ],
2103
  "dtype": "F32",
2104
  "chunks": [
2105
  {
2106
  "offsets": [
2107
- 0,
2108
  0
2109
  ],
2110
  "shape": [
2111
- 384,
2112
- 768
2113
  ],
2114
  "filename_index": 0
2115
  },
2116
  {
2117
  "offsets": [
2118
- 384,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2119
  0
2120
  ],
2121
  "shape": [
2122
- 384,
2123
- 768
 
 
 
 
 
 
 
 
2124
  ],
2125
  "filename_index": 1
2126
  }
2127
  ]
2128
  },
2129
- "h.3.ln_2.bias": {
2130
  "type": "Distributed",
2131
  "shape": [
2132
  768
@@ -2153,39 +2238,49 @@
2153
  }
2154
  ]
2155
  },
2156
- "h.7.mlp.c_fc.weight": {
2157
  "type": "Distributed",
2158
  "shape": [
2159
- 768,
2160
- 3072
 
 
2161
  ],
2162
  "dtype": "F32",
2163
  "chunks": [
2164
  {
2165
  "offsets": [
 
 
2166
  0,
2167
  0
2168
  ],
2169
  "shape": [
2170
- 768,
2171
- 1536
 
 
2172
  ],
2173
  "filename_index": 0
2174
  },
2175
  {
2176
  "offsets": [
2177
  0,
2178
- 1536
 
 
2179
  ],
2180
  "shape": [
2181
- 768,
2182
- 1536
 
 
2183
  ],
2184
  "filename_index": 1
2185
  }
2186
  ]
2187
  },
2188
- "h.0.attn.c_proj.bias": {
2189
  "type": "Distributed",
2190
  "shape": [
2191
  768
@@ -2212,7 +2307,7 @@
2212
  }
2213
  ]
2214
  },
2215
- "h.1.ln_2.bias": {
2216
  "type": "Distributed",
2217
  "shape": [
2218
  768
@@ -2239,71 +2334,61 @@
2239
  }
2240
  ]
2241
  },
2242
- "wpe.weight": {
2243
  "type": "Distributed",
2244
  "shape": [
2245
- 1024,
2246
  768
2247
  ],
2248
  "dtype": "F32",
2249
  "chunks": [
2250
  {
2251
  "offsets": [
2252
- 0,
2253
  0
2254
  ],
2255
  "shape": [
2256
- 1024,
2257
  384
2258
  ],
2259
  "filename_index": 0
2260
  },
2261
  {
2262
  "offsets": [
2263
- 0,
2264
  384
2265
  ],
2266
  "shape": [
2267
- 1024,
2268
  384
2269
  ],
2270
  "filename_index": 1
2271
  }
2272
  ]
2273
  },
2274
- "h.9.mlp.c_fc.weight": {
2275
  "type": "Distributed",
2276
  "shape": [
2277
- 768,
2278
- 3072
2279
  ],
2280
  "dtype": "F32",
2281
  "chunks": [
2282
  {
2283
  "offsets": [
2284
- 0,
2285
  0
2286
  ],
2287
  "shape": [
2288
- 768,
2289
- 1536
2290
  ],
2291
  "filename_index": 0
2292
  },
2293
  {
2294
  "offsets": [
2295
- 0,
2296
- 1536
2297
  ],
2298
  "shape": [
2299
- 768,
2300
- 1536
2301
  ],
2302
  "filename_index": 1
2303
  }
2304
  ]
2305
  },
2306
- "h.5.ln_2.weight": {
2307
  "type": "Distributed",
2308
  "shape": [
2309
  768
@@ -2330,10 +2415,10 @@
2330
  }
2331
  ]
2332
  },
2333
- "h.6.attn.c_attn.bias": {
2334
  "type": "Distributed",
2335
  "shape": [
2336
- 2304
2337
  ],
2338
  "dtype": "F32",
2339
  "chunks": [
@@ -2342,94 +2427,89 @@
2342
  0
2343
  ],
2344
  "shape": [
2345
- 1152
2346
  ],
2347
  "filename_index": 0
2348
  },
2349
  {
2350
  "offsets": [
2351
- 1152
2352
  ],
2353
  "shape": [
2354
- 1152
2355
  ],
2356
  "filename_index": 1
2357
  }
2358
  ]
2359
  },
2360
- "h.3.mlp.c_fc.bias": {
2361
  "type": "Distributed",
2362
  "shape": [
2363
- 3072
 
2364
  ],
2365
  "dtype": "F32",
2366
  "chunks": [
2367
  {
2368
  "offsets": [
 
2369
  0
2370
  ],
2371
  "shape": [
2372
- 1536
 
2373
  ],
2374
  "filename_index": 0
2375
  },
2376
  {
2377
  "offsets": [
2378
- 1536
 
2379
  ],
2380
  "shape": [
2381
- 1536
 
2382
  ],
2383
  "filename_index": 1
2384
  }
2385
  ]
2386
  },
2387
- "h.11.attn.bias": {
2388
  "type": "Distributed",
2389
  "shape": [
2390
- 1,
2391
- 1,
2392
- 1024,
2393
- 1024
2394
  ],
2395
  "dtype": "F32",
2396
  "chunks": [
2397
  {
2398
  "offsets": [
2399
- 0,
2400
- 0,
2401
  0,
2402
  0
2403
  ],
2404
  "shape": [
2405
- 1,
2406
- 1,
2407
- 1024,
2408
- 512
2409
  ],
2410
  "filename_index": 0
2411
  },
2412
  {
2413
  "offsets": [
2414
  0,
2415
- 0,
2416
- 0,
2417
- 512
2418
  ],
2419
  "shape": [
2420
- 1,
2421
- 1,
2422
- 1024,
2423
- 512
2424
  ],
2425
  "filename_index": 1
2426
  }
2427
  ]
2428
  },
2429
- "h.0.mlp.c_proj.weight": {
2430
  "type": "Distributed",
2431
  "shape": [
2432
- 3072,
2433
  768
2434
  ],
2435
  "dtype": "F32",
@@ -2440,70 +2520,55 @@
2440
  0
2441
  ],
2442
  "shape": [
2443
- 1536,
2444
  768
2445
  ],
2446
  "filename_index": 0
2447
  },
2448
  {
2449
  "offsets": [
2450
- 1536,
2451
  0
2452
  ],
2453
  "shape": [
2454
- 1536,
2455
  768
2456
  ],
2457
  "filename_index": 1
2458
  }
2459
  ]
2460
  },
2461
- "h.4.attn.bias": {
2462
  "type": "Distributed",
2463
  "shape": [
2464
- 1,
2465
- 1,
2466
- 1024,
2467
- 1024
2468
  ],
2469
  "dtype": "F32",
2470
  "chunks": [
2471
  {
2472
  "offsets": [
2473
- 0,
2474
- 0,
2475
- 0,
2476
  0
2477
  ],
2478
  "shape": [
2479
- 1,
2480
- 1,
2481
- 1024,
2482
- 512
2483
  ],
2484
  "filename_index": 0
2485
  },
2486
  {
2487
  "offsets": [
2488
- 0,
2489
- 0,
2490
- 0,
2491
- 512
2492
  ],
2493
  "shape": [
2494
- 1,
2495
- 1,
2496
- 1024,
2497
- 512
2498
  ],
2499
  "filename_index": 1
2500
  }
2501
  ]
2502
  },
2503
- "h.10.mlp.c_proj.bias": {
2504
  "type": "Distributed",
2505
  "shape": [
2506
- 768
2507
  ],
2508
  "dtype": "F32",
2509
  "chunks": [
@@ -2512,22 +2577,22 @@
2512
  0
2513
  ],
2514
  "shape": [
2515
- 384
2516
  ],
2517
  "filename_index": 0
2518
  },
2519
  {
2520
  "offsets": [
2521
- 384
2522
  ],
2523
  "shape": [
2524
- 384
2525
  ],
2526
  "filename_index": 1
2527
  }
2528
  ]
2529
  },
2530
- "h.5.ln_1.bias": {
2531
  "type": "Distributed",
2532
  "shape": [
2533
  768
@@ -2554,7 +2619,7 @@
2554
  }
2555
  ]
2556
  },
2557
- "h.1.mlp.c_proj.weight": {
2558
  "type": "Distributed",
2559
  "shape": [
2560
  3072,
@@ -2586,7 +2651,7 @@
2586
  }
2587
  ]
2588
  },
2589
- "h.6.ln_2.weight": {
2590
  "type": "Distributed",
2591
  "shape": [
2592
  768
@@ -2613,7 +2678,7 @@
2613
  }
2614
  ]
2615
  },
2616
- "h.8.ln_1.bias": {
2617
  "type": "Distributed",
2618
  "shape": [
2619
  768
@@ -2640,7 +2705,7 @@
2640
  }
2641
  ]
2642
  },
2643
- "h.8.ln_2.bias": {
2644
  "type": "Distributed",
2645
  "shape": [
2646
  768
@@ -2667,84 +2732,64 @@
2667
  }
2668
  ]
2669
  },
2670
- "h.7.attn.c_proj.weight": {
2671
  "type": "Distributed",
2672
  "shape": [
2673
- 768,
2674
- 768
2675
  ],
2676
  "dtype": "F32",
2677
  "chunks": [
2678
  {
2679
  "offsets": [
2680
- 0,
2681
  0
2682
  ],
2683
  "shape": [
2684
- 384,
2685
- 768
2686
  ],
2687
  "filename_index": 0
2688
  },
2689
  {
2690
  "offsets": [
2691
- 384,
2692
- 0
2693
  ],
2694
  "shape": [
2695
- 384,
2696
- 768
2697
  ],
2698
  "filename_index": 1
2699
  }
2700
  ]
2701
  },
2702
- "h.7.attn.bias": {
2703
  "type": "Distributed",
2704
  "shape": [
2705
- 1,
2706
- 1,
2707
- 1024,
2708
- 1024
2709
  ],
2710
  "dtype": "F32",
2711
  "chunks": [
2712
  {
2713
  "offsets": [
2714
- 0,
2715
- 0,
2716
- 0,
2717
  0
2718
  ],
2719
  "shape": [
2720
- 1,
2721
- 1,
2722
- 1024,
2723
- 512
2724
  ],
2725
  "filename_index": 0
2726
  },
2727
  {
2728
  "offsets": [
2729
- 0,
2730
- 0,
2731
- 0,
2732
- 512
2733
  ],
2734
  "shape": [
2735
- 1,
2736
- 1,
2737
- 1024,
2738
- 512
2739
  ],
2740
  "filename_index": 1
2741
  }
2742
  ]
2743
  },
2744
- "h.1.attn.c_proj.weight": {
2745
  "type": "Distributed",
2746
  "shape": [
2747
- 768,
2748
  768
2749
  ],
2750
  "dtype": "F32",
@@ -2755,25 +2800,25 @@
2755
  0
2756
  ],
2757
  "shape": [
2758
- 384,
2759
  768
2760
  ],
2761
  "filename_index": 0
2762
  },
2763
  {
2764
  "offsets": [
2765
- 384,
2766
  0
2767
  ],
2768
  "shape": [
2769
- 384,
2770
  768
2771
  ],
2772
  "filename_index": 1
2773
  }
2774
  ]
2775
  },
2776
- "h.7.attn.c_attn.bias": {
2777
  "type": "Distributed",
2778
  "shape": [
2779
  2304
@@ -2800,39 +2845,34 @@
2800
  }
2801
  ]
2802
  },
2803
- "h.6.attn.c_attn.weight": {
2804
  "type": "Distributed",
2805
  "shape": [
2806
- 768,
2807
- 2304
2808
  ],
2809
  "dtype": "F32",
2810
  "chunks": [
2811
  {
2812
  "offsets": [
2813
- 0,
2814
  0
2815
  ],
2816
  "shape": [
2817
- 768,
2818
- 1152
2819
  ],
2820
  "filename_index": 0
2821
  },
2822
  {
2823
  "offsets": [
2824
- 0,
2825
- 1152
2826
  ],
2827
  "shape": [
2828
- 768,
2829
- 1152
2830
  ],
2831
  "filename_index": 1
2832
  }
2833
  ]
2834
  },
2835
- "h.11.ln_2.weight": {
2836
  "type": "Distributed",
2837
  "shape": [
2838
  768
@@ -2859,7 +2899,7 @@
2859
  }
2860
  ]
2861
  },
2862
- "h.3.attn.c_proj.bias": {
2863
  "type": "Distributed",
2864
  "shape": [
2865
  768
@@ -2886,10 +2926,10 @@
2886
  }
2887
  ]
2888
  },
2889
- "h.9.ln_2.bias": {
2890
  "type": "Distributed",
2891
  "shape": [
2892
- 768
2893
  ],
2894
  "dtype": "F32",
2895
  "chunks": [
@@ -2898,22 +2938,22 @@
2898
  0
2899
  ],
2900
  "shape": [
2901
- 384
2902
  ],
2903
  "filename_index": 0
2904
  },
2905
  {
2906
  "offsets": [
2907
- 384
2908
  ],
2909
  "shape": [
2910
- 384
2911
  ],
2912
  "filename_index": 1
2913
  }
2914
  ]
2915
  },
2916
- "h.9.attn.c_proj.bias": {
2917
  "type": "Distributed",
2918
  "shape": [
2919
  768
@@ -2940,10 +2980,10 @@
2940
  }
2941
  ]
2942
  },
2943
- "h.5.mlp.c_fc.bias": {
2944
  "type": "Distributed",
2945
  "shape": [
2946
- 3072
2947
  ],
2948
  "dtype": "F32",
2949
  "chunks": [
@@ -2952,99 +2992,79 @@
2952
  0
2953
  ],
2954
  "shape": [
2955
- 1536
2956
  ],
2957
  "filename_index": 0
2958
  },
2959
  {
2960
  "offsets": [
2961
- 1536
2962
  ],
2963
  "shape": [
2964
- 1536
2965
  ],
2966
  "filename_index": 1
2967
  }
2968
  ]
2969
  },
2970
- "h.2.attn.c_attn.weight": {
2971
  "type": "Distributed",
2972
  "shape": [
2973
- 768,
2974
- 2304
2975
  ],
2976
  "dtype": "F32",
2977
  "chunks": [
2978
  {
2979
  "offsets": [
2980
- 0,
2981
  0
2982
  ],
2983
  "shape": [
2984
- 768,
2985
- 1152
2986
  ],
2987
  "filename_index": 0
2988
  },
2989
  {
2990
  "offsets": [
2991
- 0,
2992
- 1152
2993
  ],
2994
  "shape": [
2995
- 768,
2996
- 1152
2997
  ],
2998
  "filename_index": 1
2999
  }
3000
  ]
3001
  },
3002
- "h.10.attn.bias": {
3003
  "type": "Distributed",
3004
  "shape": [
3005
- 1,
3006
- 1,
3007
- 1024,
3008
- 1024
3009
  ],
3010
  "dtype": "F32",
3011
  "chunks": [
3012
  {
3013
  "offsets": [
3014
- 0,
3015
- 0,
3016
- 0,
3017
  0
3018
  ],
3019
  "shape": [
3020
- 1,
3021
- 1,
3022
- 1024,
3023
- 512
3024
  ],
3025
  "filename_index": 0
3026
  },
3027
  {
3028
  "offsets": [
3029
- 0,
3030
- 0,
3031
- 0,
3032
- 512
3033
  ],
3034
  "shape": [
3035
- 1,
3036
- 1,
3037
- 1024,
3038
- 512
3039
  ],
3040
  "filename_index": 1
3041
  }
3042
  ]
3043
  },
3044
- "h.9.mlp.c_proj.weight": {
3045
  "type": "Distributed",
3046
  "shape": [
3047
- 3072,
3048
  768
3049
  ],
3050
  "dtype": "F32",
@@ -3055,89 +3075,79 @@
3055
  0
3056
  ],
3057
  "shape": [
3058
- 1536,
3059
  768
3060
  ],
3061
  "filename_index": 0
3062
  },
3063
  {
3064
  "offsets": [
3065
- 1536,
3066
  0
3067
  ],
3068
  "shape": [
3069
- 1536,
3070
  768
3071
  ],
3072
  "filename_index": 1
3073
  }
3074
  ]
3075
  },
3076
- "h.9.attn.c_attn.weight": {
3077
  "type": "Distributed",
3078
  "shape": [
3079
- 768,
3080
- 2304
3081
  ],
3082
  "dtype": "F32",
3083
  "chunks": [
3084
  {
3085
  "offsets": [
3086
- 0,
3087
  0
3088
  ],
3089
  "shape": [
3090
- 768,
3091
- 1152
3092
  ],
3093
  "filename_index": 0
3094
  },
3095
  {
3096
  "offsets": [
3097
- 0,
3098
- 1152
3099
  ],
3100
  "shape": [
3101
- 768,
3102
- 1152
3103
  ],
3104
  "filename_index": 1
3105
  }
3106
  ]
3107
  },
3108
- "h.5.mlp.c_fc.weight": {
3109
  "type": "Distributed",
3110
  "shape": [
3111
- 768,
3112
- 3072
3113
  ],
3114
  "dtype": "F32",
3115
  "chunks": [
3116
  {
3117
  "offsets": [
3118
- 0,
3119
  0
3120
  ],
3121
  "shape": [
3122
- 768,
3123
- 1536
3124
  ],
3125
  "filename_index": 0
3126
  },
3127
  {
3128
  "offsets": [
3129
- 0,
3130
- 1536
3131
  ],
3132
  "shape": [
3133
- 768,
3134
- 1536
3135
  ],
3136
  "filename_index": 1
3137
  }
3138
  ]
3139
  },
3140
- "h.4.ln_1.weight": {
3141
  "type": "Distributed",
3142
  "shape": [
3143
  768
@@ -3164,61 +3174,71 @@
3164
  }
3165
  ]
3166
  },
3167
- "h.5.ln_1.weight": {
3168
  "type": "Distributed",
3169
  "shape": [
 
3170
  768
3171
  ],
3172
  "dtype": "F32",
3173
  "chunks": [
3174
  {
3175
  "offsets": [
 
3176
  0
3177
  ],
3178
  "shape": [
3179
- 384
 
3180
  ],
3181
  "filename_index": 0
3182
  },
3183
  {
3184
  "offsets": [
3185
- 384
 
3186
  ],
3187
  "shape": [
3188
- 384
 
3189
  ],
3190
  "filename_index": 1
3191
  }
3192
  ]
3193
  },
3194
- "h.3.ln_1.weight": {
3195
  "type": "Distributed",
3196
  "shape": [
 
3197
  768
3198
  ],
3199
  "dtype": "F32",
3200
  "chunks": [
3201
  {
3202
  "offsets": [
 
3203
  0
3204
  ],
3205
  "shape": [
3206
- 384
 
3207
  ],
3208
  "filename_index": 0
3209
  },
3210
  {
3211
  "offsets": [
3212
- 384
 
3213
  ],
3214
  "shape": [
3215
- 384
 
3216
  ],
3217
  "filename_index": 1
3218
  }
3219
  ]
3220
  },
3221
- "h.11.attn.c_proj.bias": {
3222
  "type": "Distributed",
3223
  "shape": [
3224
  768
@@ -3245,39 +3265,34 @@
3245
  }
3246
  ]
3247
  },
3248
- "h.11.attn.c_attn.weight": {
3249
  "type": "Distributed",
3250
  "shape": [
3251
- 768,
3252
- 2304
3253
  ],
3254
  "dtype": "F32",
3255
  "chunks": [
3256
  {
3257
  "offsets": [
3258
- 0,
3259
  0
3260
  ],
3261
  "shape": [
3262
- 768,
3263
- 1152
3264
  ],
3265
  "filename_index": 0
3266
  },
3267
  {
3268
  "offsets": [
3269
- 0,
3270
- 1152
3271
  ],
3272
  "shape": [
3273
- 768,
3274
- 1152
3275
  ],
3276
  "filename_index": 1
3277
  }
3278
  ]
3279
  },
3280
- "h.1.ln_1.bias": {
3281
  "type": "Distributed",
3282
  "shape": [
3283
  768
@@ -3304,88 +3319,113 @@
3304
  }
3305
  ]
3306
  },
3307
- "h.0.ln_2.bias": {
3308
  "type": "Distributed",
3309
  "shape": [
 
3310
  768
3311
  ],
3312
  "dtype": "F32",
3313
  "chunks": [
3314
  {
3315
  "offsets": [
 
3316
  0
3317
  ],
3318
  "shape": [
 
3319
  384
3320
  ],
3321
  "filename_index": 0
3322
  },
3323
  {
3324
  "offsets": [
 
3325
  384
3326
  ],
3327
  "shape": [
 
3328
  384
3329
  ],
3330
  "filename_index": 1
3331
  }
3332
  ]
3333
  },
3334
- "h.2.attn.c_attn.bias": {
3335
  "type": "Distributed",
3336
  "shape": [
3337
- 2304
 
3338
  ],
3339
  "dtype": "F32",
3340
  "chunks": [
3341
  {
3342
  "offsets": [
 
3343
  0
3344
  ],
3345
  "shape": [
3346
- 1152
 
3347
  ],
3348
  "filename_index": 0
3349
  },
3350
  {
3351
  "offsets": [
3352
- 1152
 
3353
  ],
3354
  "shape": [
3355
- 1152
 
3356
  ],
3357
  "filename_index": 1
3358
  }
3359
  ]
3360
  },
3361
- "h.11.ln_2.bias": {
3362
  "type": "Distributed",
3363
  "shape": [
3364
- 768
 
 
 
3365
  ],
3366
  "dtype": "F32",
3367
  "chunks": [
3368
  {
3369
  "offsets": [
 
 
 
3370
  0
3371
  ],
3372
  "shape": [
3373
- 384
 
 
 
3374
  ],
3375
  "filename_index": 0
3376
  },
3377
  {
3378
  "offsets": [
3379
- 384
 
 
 
3380
  ],
3381
  "shape": [
3382
- 384
 
 
 
3383
  ],
3384
  "filename_index": 1
3385
  }
3386
  ]
3387
  },
3388
- "h.4.attn.c_proj.bias": {
3389
  "type": "Distributed",
3390
  "shape": [
3391
  768
@@ -3412,37 +3452,10 @@
3412
  }
3413
  ]
3414
  },
3415
- "h.10.mlp.c_fc.bias": {
3416
- "type": "Distributed",
3417
- "shape": [
3418
- 3072
3419
- ],
3420
- "dtype": "F32",
3421
- "chunks": [
3422
- {
3423
- "offsets": [
3424
- 0
3425
- ],
3426
- "shape": [
3427
- 1536
3428
- ],
3429
- "filename_index": 0
3430
- },
3431
- {
3432
- "offsets": [
3433
- 1536
3434
- ],
3435
- "shape": [
3436
- 1536
3437
- ],
3438
- "filename_index": 1
3439
- }
3440
- ]
3441
- },
3442
- "h.11.mlp.c_proj.bias": {
3443
  "type": "Distributed",
3444
  "shape": [
3445
- 768
3446
  ],
3447
  "dtype": "F32",
3448
  "chunks": [
@@ -3451,22 +3464,22 @@
3451
  0
3452
  ],
3453
  "shape": [
3454
- 384
3455
  ],
3456
  "filename_index": 0
3457
  },
3458
  {
3459
  "offsets": [
3460
- 384
3461
  ],
3462
  "shape": [
3463
- 384
3464
  ],
3465
  "filename_index": 1
3466
  }
3467
  ]
3468
  },
3469
- "h.6.attn.bias": {
3470
  "type": "Distributed",
3471
  "shape": [
3472
  1,
@@ -3508,52 +3521,42 @@
3508
  }
3509
  ]
3510
  },
3511
- "h.0.attn.bias": {
3512
  "type": "Distributed",
3513
  "shape": [
3514
- 1,
3515
- 1,
3516
- 1024,
3517
- 1024
3518
  ],
3519
  "dtype": "F32",
3520
  "chunks": [
3521
  {
3522
  "offsets": [
3523
- 0,
3524
- 0,
3525
  0,
3526
  0
3527
  ],
3528
  "shape": [
3529
- 1,
3530
- 1,
3531
- 1024,
3532
- 512
3533
  ],
3534
  "filename_index": 0
3535
  },
3536
  {
3537
  "offsets": [
3538
  0,
3539
- 0,
3540
- 0,
3541
- 512
3542
  ],
3543
  "shape": [
3544
- 1,
3545
- 1,
3546
- 1024,
3547
- 512
3548
  ],
3549
  "filename_index": 1
3550
  }
3551
  ]
3552
  },
3553
- "h.7.mlp.c_fc.bias": {
3554
  "type": "Distributed",
3555
  "shape": [
3556
- 3072
3557
  ],
3558
  "dtype": "F32",
3559
  "chunks": [
@@ -3562,22 +3565,22 @@
3562
  0
3563
  ],
3564
  "shape": [
3565
- 1536
3566
  ],
3567
  "filename_index": 0
3568
  },
3569
  {
3570
  "offsets": [
3571
- 1536
3572
  ],
3573
  "shape": [
3574
- 1536
3575
  ],
3576
  "filename_index": 1
3577
  }
3578
  ]
3579
  },
3580
- "h.2.ln_1.weight": {
3581
  "type": "Distributed",
3582
  "shape": [
3583
  768
@@ -3604,130 +3607,135 @@
3604
  }
3605
  ]
3606
  },
3607
- "h.10.attn.c_proj.bias": {
3608
  "type": "Distributed",
3609
  "shape": [
3610
- 768
 
3611
  ],
3612
  "dtype": "F32",
3613
  "chunks": [
3614
  {
3615
  "offsets": [
 
3616
  0
3617
  ],
3618
  "shape": [
3619
- 384
 
3620
  ],
3621
  "filename_index": 0
3622
  },
3623
  {
3624
  "offsets": [
3625
- 384
 
3626
  ],
3627
  "shape": [
3628
- 384
 
3629
  ],
3630
  "filename_index": 1
3631
  }
3632
  ]
3633
  },
3634
- "h.1.attn.c_attn.weight": {
3635
  "type": "Distributed",
3636
  "shape": [
3637
- 768,
3638
- 2304
3639
  ],
3640
  "dtype": "F32",
3641
  "chunks": [
3642
  {
3643
  "offsets": [
3644
- 0,
3645
  0
3646
  ],
3647
  "shape": [
3648
- 768,
3649
- 1152
3650
  ],
3651
  "filename_index": 0
3652
  },
3653
  {
3654
  "offsets": [
3655
- 0,
3656
- 1152
3657
  ],
3658
  "shape": [
3659
- 768,
3660
- 1152
3661
  ],
3662
  "filename_index": 1
3663
  }
3664
  ]
3665
  },
3666
- "h.4.attn.c_attn.weight": {
3667
  "type": "Distributed",
3668
  "shape": [
3669
- 768,
3670
- 2304
3671
  ],
3672
  "dtype": "F32",
3673
  "chunks": [
3674
  {
3675
  "offsets": [
3676
- 0,
3677
  0
3678
  ],
3679
  "shape": [
3680
- 768,
3681
- 1152
3682
  ],
3683
  "filename_index": 0
3684
  },
3685
  {
3686
  "offsets": [
3687
- 0,
3688
- 1152
3689
  ],
3690
  "shape": [
3691
- 768,
3692
- 1152
3693
  ],
3694
  "filename_index": 1
3695
  }
3696
  ]
3697
  },
3698
- "h.4.mlp.c_fc.weight": {
3699
  "type": "Distributed",
3700
  "shape": [
3701
- 768,
3702
- 3072
 
 
3703
  ],
3704
  "dtype": "F32",
3705
  "chunks": [
3706
  {
3707
  "offsets": [
 
 
3708
  0,
3709
  0
3710
  ],
3711
  "shape": [
3712
- 768,
3713
- 1536
 
 
3714
  ],
3715
  "filename_index": 0
3716
  },
3717
  {
3718
  "offsets": [
3719
  0,
3720
- 1536
 
 
3721
  ],
3722
  "shape": [
3723
- 768,
3724
- 1536
 
 
3725
  ],
3726
  "filename_index": 1
3727
  }
3728
  ]
3729
  },
3730
- "h.9.ln_1.weight": {
3731
  "type": "Distributed",
3732
  "shape": [
3733
  768
@@ -3754,7 +3762,7 @@
3754
  }
3755
  ]
3756
  },
3757
- "h.5.ln_2.bias": {
3758
  "type": "Distributed",
3759
  "shape": [
3760
  768
@@ -3781,34 +3789,39 @@
3781
  }
3782
  ]
3783
  },
3784
- "h.8.ln_2.weight": {
3785
  "type": "Distributed",
3786
  "shape": [
3787
- 768
 
3788
  ],
3789
  "dtype": "F32",
3790
  "chunks": [
3791
  {
3792
  "offsets": [
 
3793
  0
3794
  ],
3795
  "shape": [
3796
- 384
 
3797
  ],
3798
  "filename_index": 0
3799
  },
3800
  {
3801
  "offsets": [
3802
- 384
 
3803
  ],
3804
  "shape": [
3805
- 384
 
3806
  ],
3807
  "filename_index": 1
3808
  }
3809
  ]
3810
  },
3811
- "h.2.ln_2.weight": {
3812
  "type": "Distributed",
3813
  "shape": [
3814
  768
@@ -3835,71 +3848,61 @@
3835
  }
3836
  ]
3837
  },
3838
- "h.2.attn.c_proj.weight": {
3839
  "type": "Distributed",
3840
  "shape": [
3841
- 768,
3842
  768
3843
  ],
3844
  "dtype": "F32",
3845
  "chunks": [
3846
  {
3847
  "offsets": [
3848
- 0,
3849
  0
3850
  ],
3851
  "shape": [
3852
- 384,
3853
- 768
3854
  ],
3855
  "filename_index": 0
3856
  },
3857
  {
3858
  "offsets": [
3859
- 384,
3860
- 0
3861
  ],
3862
  "shape": [
3863
- 384,
3864
- 768
3865
  ],
3866
  "filename_index": 1
3867
  }
3868
  ]
3869
  },
3870
- "h.0.attn.c_proj.weight": {
3871
  "type": "Distributed",
3872
  "shape": [
3873
- 768,
3874
  768
3875
  ],
3876
  "dtype": "F32",
3877
  "chunks": [
3878
  {
3879
  "offsets": [
3880
- 0,
3881
  0
3882
  ],
3883
  "shape": [
3884
- 384,
3885
- 768
3886
  ],
3887
  "filename_index": 0
3888
  },
3889
  {
3890
  "offsets": [
3891
- 384,
3892
- 0
3893
  ],
3894
  "shape": [
3895
- 384,
3896
- 768
3897
  ],
3898
  "filename_index": 1
3899
  }
3900
  ]
3901
  },
3902
- "h.8.mlp.c_proj.bias": {
3903
  "type": "Distributed",
3904
  "shape": [
3905
  768
@@ -3926,7 +3929,7 @@
3926
  }
3927
  ]
3928
  },
3929
- "h.7.ln_1.bias": {
3930
  "type": "Distributed",
3931
  "shape": [
3932
  768
@@ -3953,64 +3956,74 @@
3953
  }
3954
  ]
3955
  },
3956
- "h.3.mlp.c_proj.bias": {
3957
  "type": "Distributed",
3958
  "shape": [
 
3959
  768
3960
  ],
3961
  "dtype": "F32",
3962
  "chunks": [
3963
  {
3964
  "offsets": [
 
3965
  0
3966
  ],
3967
  "shape": [
3968
- 384
 
3969
  ],
3970
  "filename_index": 0
3971
  },
3972
  {
3973
  "offsets": [
3974
- 384
 
3975
  ],
3976
  "shape": [
3977
- 384
 
3978
  ],
3979
  "filename_index": 1
3980
  }
3981
  ]
3982
  },
3983
- "h.0.mlp.c_proj.bias": {
3984
  "type": "Distributed",
3985
  "shape": [
 
3986
  768
3987
  ],
3988
  "dtype": "F32",
3989
  "chunks": [
3990
  {
3991
  "offsets": [
 
3992
  0
3993
  ],
3994
  "shape": [
3995
- 384
 
3996
  ],
3997
  "filename_index": 0
3998
  },
3999
  {
4000
  "offsets": [
4001
- 384
 
4002
  ],
4003
  "shape": [
4004
- 384
 
4005
  ],
4006
  "filename_index": 1
4007
  }
4008
  ]
4009
  },
4010
- "h.8.attn.c_attn.bias": {
4011
  "type": "Distributed",
4012
  "shape": [
4013
- 2304
4014
  ],
4015
  "dtype": "F32",
4016
  "chunks": [
@@ -4019,64 +4032,49 @@
4019
  0
4020
  ],
4021
  "shape": [
4022
- 1152
4023
  ],
4024
  "filename_index": 0
4025
  },
4026
  {
4027
  "offsets": [
4028
- 1152
4029
  ],
4030
  "shape": [
4031
- 1152
4032
  ],
4033
  "filename_index": 1
4034
  }
4035
  ]
4036
  },
4037
- "h.3.attn.bias": {
4038
  "type": "Distributed",
4039
  "shape": [
4040
- 1,
4041
- 1,
4042
- 1024,
4043
- 1024
4044
  ],
4045
  "dtype": "F32",
4046
  "chunks": [
4047
  {
4048
  "offsets": [
4049
- 0,
4050
- 0,
4051
- 0,
4052
  0
4053
  ],
4054
  "shape": [
4055
- 1,
4056
- 1,
4057
- 1024,
4058
- 512
4059
  ],
4060
  "filename_index": 0
4061
  },
4062
  {
4063
  "offsets": [
4064
- 0,
4065
- 0,
4066
- 0,
4067
- 512
4068
  ],
4069
  "shape": [
4070
- 1,
4071
- 1,
4072
- 1024,
4073
- 512
4074
  ],
4075
  "filename_index": 1
4076
  }
4077
  ]
4078
  },
4079
- "h.6.ln_1.bias": {
4080
  "type": "Distributed",
4081
  "shape": [
4082
  768
@@ -4103,175 +4101,155 @@
4103
  }
4104
  ]
4105
  },
4106
- "h.7.attn.c_attn.weight": {
4107
  "type": "Distributed",
4108
  "shape": [
4109
- 768,
4110
  2304
4111
  ],
4112
  "dtype": "F32",
4113
  "chunks": [
4114
  {
4115
  "offsets": [
4116
- 0,
4117
  0
4118
  ],
4119
  "shape": [
4120
- 768,
4121
  1152
4122
  ],
4123
  "filename_index": 0
4124
  },
4125
  {
4126
  "offsets": [
4127
- 0,
4128
  1152
4129
  ],
4130
  "shape": [
4131
- 768,
4132
  1152
4133
  ],
4134
  "filename_index": 1
4135
  }
4136
  ]
4137
  },
4138
- "h.4.attn.c_proj.weight": {
4139
  "type": "Distributed",
4140
  "shape": [
4141
- 768,
4142
- 768
4143
  ],
4144
  "dtype": "F32",
4145
  "chunks": [
4146
  {
4147
  "offsets": [
4148
- 0,
4149
  0
4150
  ],
4151
  "shape": [
4152
- 384,
4153
- 768
4154
  ],
4155
  "filename_index": 0
4156
  },
4157
  {
4158
  "offsets": [
4159
- 384,
4160
- 0
4161
  ],
4162
  "shape": [
4163
- 384,
4164
- 768
4165
  ],
4166
  "filename_index": 1
4167
  }
4168
  ]
4169
  },
4170
- "h.6.mlp.c_proj.weight": {
4171
  "type": "Distributed",
4172
  "shape": [
4173
- 3072,
4174
- 768
4175
  ],
4176
  "dtype": "F32",
4177
  "chunks": [
4178
  {
4179
  "offsets": [
4180
- 0,
4181
  0
4182
  ],
4183
  "shape": [
4184
- 1536,
4185
- 768
4186
  ],
4187
  "filename_index": 0
4188
  },
4189
  {
4190
  "offsets": [
4191
- 1536,
4192
- 0
4193
  ],
4194
  "shape": [
4195
- 1536,
4196
- 768
4197
  ],
4198
  "filename_index": 1
4199
  }
4200
  ]
4201
  },
4202
- "h.8.attn.bias": {
4203
  "type": "Distributed",
4204
  "shape": [
4205
- 1,
4206
- 1,
4207
- 1024,
4208
- 1024
4209
  ],
4210
  "dtype": "F32",
4211
  "chunks": [
4212
  {
4213
  "offsets": [
4214
- 0,
4215
- 0,
4216
  0,
4217
  0
4218
  ],
4219
  "shape": [
4220
- 1,
4221
- 1,
4222
- 1024,
4223
- 512
4224
  ],
4225
  "filename_index": 0
4226
  },
4227
  {
4228
  "offsets": [
4229
  0,
4230
- 0,
4231
- 0,
4232
- 512
4233
  ],
4234
  "shape": [
4235
- 1,
4236
- 1,
4237
- 1024,
4238
- 512
4239
  ],
4240
  "filename_index": 1
4241
  }
4242
  ]
4243
  },
4244
- "h.4.mlp.c_fc.bias": {
4245
  "type": "Distributed",
4246
  "shape": [
4247
- 3072
 
4248
  ],
4249
  "dtype": "F32",
4250
  "chunks": [
4251
  {
4252
  "offsets": [
 
4253
  0
4254
  ],
4255
  "shape": [
4256
- 1536
 
4257
  ],
4258
  "filename_index": 0
4259
  },
4260
  {
4261
  "offsets": [
4262
- 1536
 
4263
  ],
4264
  "shape": [
4265
- 1536
 
4266
  ],
4267
  "filename_index": 1
4268
  }
4269
  ]
4270
  },
4271
- "h.3.attn.c_attn.bias": {
4272
  "type": "Distributed",
4273
  "shape": [
4274
- 2304
4275
  ],
4276
  "dtype": "F32",
4277
  "chunks": [
@@ -4280,22 +4258,22 @@
4280
  0
4281
  ],
4282
  "shape": [
4283
- 1152
4284
  ],
4285
  "filename_index": 0
4286
  },
4287
  {
4288
  "offsets": [
4289
- 1152
4290
  ],
4291
  "shape": [
4292
- 1152
4293
  ],
4294
  "filename_index": 1
4295
  }
4296
  ]
4297
  },
4298
- "h.5.mlp.c_proj.weight": {
4299
  "type": "Distributed",
4300
  "shape": [
4301
  3072,
@@ -4327,39 +4305,34 @@
4327
  }
4328
  ]
4329
  },
4330
- "h.11.mlp.c_fc.weight": {
4331
  "type": "Distributed",
4332
  "shape": [
4333
- 768,
4334
- 3072
4335
  ],
4336
  "dtype": "F32",
4337
  "chunks": [
4338
  {
4339
  "offsets": [
4340
- 0,
4341
  0
4342
  ],
4343
  "shape": [
4344
- 768,
4345
- 1536
4346
  ],
4347
  "filename_index": 0
4348
  },
4349
  {
4350
  "offsets": [
4351
- 0,
4352
- 1536
4353
  ],
4354
  "shape": [
4355
- 768,
4356
- 1536
4357
  ],
4358
  "filename_index": 1
4359
  }
4360
  ]
4361
  },
4362
- "h.10.ln_2.weight": {
4363
  "type": "Distributed",
4364
  "shape": [
4365
  768
@@ -4386,10 +4359,10 @@
4386
  }
4387
  ]
4388
  },
4389
- "h.2.mlp.c_fc.bias": {
4390
  "type": "Distributed",
4391
  "shape": [
4392
- 3072
4393
  ],
4394
  "dtype": "F32",
4395
  "chunks": [
@@ -4398,22 +4371,22 @@
4398
  0
4399
  ],
4400
  "shape": [
4401
- 1536
4402
  ],
4403
  "filename_index": 0
4404
  },
4405
  {
4406
  "offsets": [
4407
- 1536
4408
  ],
4409
  "shape": [
4410
- 1536
4411
  ],
4412
  "filename_index": 1
4413
  }
4414
  ]
4415
  },
4416
- "h.2.ln_1.bias": {
4417
  "type": "Distributed",
4418
  "shape": [
4419
  768
@@ -4440,7 +4413,7 @@
4440
  }
4441
  ]
4442
  },
4443
- "ln_f.bias": {
4444
  "type": "Distributed",
4445
  "shape": [
4446
  768
@@ -4467,7 +4440,49 @@
4467
  }
4468
  ]
4469
  },
4470
- "h.9.mlp.c_proj.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4471
  "type": "Distributed",
4472
  "shape": [
4473
  768
@@ -4494,43 +4509,38 @@
4494
  }
4495
  ]
4496
  },
4497
- "h.3.mlp.c_fc.weight": {
4498
  "type": "Distributed",
4499
  "shape": [
4500
- 768,
4501
- 3072
4502
  ],
4503
  "dtype": "F32",
4504
  "chunks": [
4505
  {
4506
  "offsets": [
4507
- 0,
4508
  0
4509
  ],
4510
  "shape": [
4511
- 768,
4512
- 1536
4513
  ],
4514
  "filename_index": 0
4515
  },
4516
  {
4517
  "offsets": [
4518
- 0,
4519
- 1536
4520
  ],
4521
  "shape": [
4522
- 768,
4523
- 1536
4524
  ],
4525
  "filename_index": 1
4526
  }
4527
  ]
4528
  },
4529
- "h.11.attn.c_proj.weight": {
4530
  "type": "Distributed",
4531
  "shape": [
4532
  768,
4533
- 768
4534
  ],
4535
  "dtype": "F32",
4536
  "chunks": [
@@ -4540,84 +4550,94 @@
4540
  0
4541
  ],
4542
  "shape": [
4543
- 384,
4544
- 768
4545
  ],
4546
  "filename_index": 0
4547
  },
4548
  {
4549
  "offsets": [
4550
- 384,
4551
- 0
4552
  ],
4553
  "shape": [
4554
- 384,
4555
- 768
4556
  ],
4557
  "filename_index": 1
4558
  }
4559
  ]
4560
  },
4561
- "h.8.mlp.c_fc.bias": {
4562
  "type": "Distributed",
4563
  "shape": [
4564
- 3072
 
 
 
4565
  ],
4566
  "dtype": "F32",
4567
  "chunks": [
4568
  {
4569
  "offsets": [
 
 
 
4570
  0
4571
  ],
4572
  "shape": [
4573
- 1536
 
 
 
4574
  ],
4575
  "filename_index": 0
4576
  },
4577
  {
4578
  "offsets": [
4579
- 1536
 
 
 
4580
  ],
4581
  "shape": [
4582
- 1536
 
 
 
4583
  ],
4584
  "filename_index": 1
4585
  }
4586
  ]
4587
  },
4588
- "h.4.mlp.c_proj.weight": {
4589
  "type": "Distributed",
4590
  "shape": [
4591
- 3072,
4592
  768
4593
  ],
4594
  "dtype": "F32",
4595
  "chunks": [
4596
  {
4597
  "offsets": [
4598
- 0,
4599
  0
4600
  ],
4601
  "shape": [
4602
- 1536,
4603
- 768
4604
  ],
4605
  "filename_index": 0
4606
  },
4607
  {
4608
  "offsets": [
4609
- 1536,
4610
- 0
4611
  ],
4612
  "shape": [
4613
- 1536,
4614
- 768
4615
  ],
4616
  "filename_index": 1
4617
  }
4618
  ]
4619
  },
4620
- "h.10.attn.c_attn.weight": {
4621
  "type": "Distributed",
4622
  "shape": [
4623
  768,
@@ -4649,84 +4669,64 @@
4649
  }
4650
  ]
4651
  },
4652
- "h.5.attn.c_attn.weight": {
4653
  "type": "Distributed",
4654
  "shape": [
4655
- 768,
4656
- 2304
4657
  ],
4658
  "dtype": "F32",
4659
  "chunks": [
4660
  {
4661
  "offsets": [
4662
- 0,
4663
  0
4664
  ],
4665
  "shape": [
4666
- 768,
4667
- 1152
4668
  ],
4669
  "filename_index": 0
4670
  },
4671
  {
4672
  "offsets": [
4673
- 0,
4674
- 1152
4675
  ],
4676
  "shape": [
4677
- 768,
4678
- 1152
4679
  ],
4680
  "filename_index": 1
4681
  }
4682
  ]
4683
  },
4684
- "h.9.attn.bias": {
4685
  "type": "Distributed",
4686
  "shape": [
4687
- 1,
4688
- 1,
4689
- 1024,
4690
- 1024
4691
  ],
4692
  "dtype": "F32",
4693
  "chunks": [
4694
  {
4695
  "offsets": [
4696
- 0,
4697
- 0,
4698
- 0,
4699
  0
4700
  ],
4701
  "shape": [
4702
- 1,
4703
- 1,
4704
- 1024,
4705
- 512
4706
  ],
4707
  "filename_index": 0
4708
  },
4709
  {
4710
  "offsets": [
4711
- 0,
4712
- 0,
4713
- 0,
4714
- 512
4715
  ],
4716
  "shape": [
4717
- 1,
4718
- 1,
4719
- 1024,
4720
- 512
4721
  ],
4722
  "filename_index": 1
4723
  }
4724
  ]
4725
  },
4726
- "h.8.attn.c_proj.bias": {
4727
  "type": "Distributed",
4728
  "shape": [
4729
- 768
4730
  ],
4731
  "dtype": "F32",
4732
  "chunks": [
@@ -4735,16 +4735,16 @@
4735
  0
4736
  ],
4737
  "shape": [
4738
- 384
4739
  ],
4740
  "filename_index": 0
4741
  },
4742
  {
4743
  "offsets": [
4744
- 384
4745
  ],
4746
  "shape": [
4747
- 384
4748
  ],
4749
  "filename_index": 1
4750
  }
 
1
  {
2
  "tensors": {
3
+ "h.2.ln_2.weight": {
4
  "type": "Distributed",
5
  "shape": [
 
6
  768
7
  ],
8
  "dtype": "F32",
9
  "chunks": [
10
  {
11
  "offsets": [
 
12
  0
13
  ],
14
  "shape": [
15
+ 384
 
16
  ],
17
  "filename_index": 0
18
  },
19
  {
20
  "offsets": [
21
+ 384
 
22
  ],
23
  "shape": [
24
+ 384
 
25
  ],
26
  "filename_index": 1
27
  }
28
  ]
29
  },
30
+ "h.5.mlp.c_proj.bias": {
31
  "type": "Distributed",
32
  "shape": [
33
  768
 
54
  }
55
  ]
56
  },
57
+ "h.9.mlp.c_fc.weight": {
58
  "type": "Distributed",
59
  "shape": [
60
  768,
61
+ 3072
62
  ],
63
  "dtype": "F32",
64
  "chunks": [
 
69
  ],
70
  "shape": [
71
  768,
72
+ 1536
73
  ],
74
  "filename_index": 0
75
  },
76
  {
77
  "offsets": [
78
  0,
79
+ 1536
80
  ],
81
  "shape": [
82
  768,
83
+ 1536
84
  ],
85
  "filename_index": 1
86
  }
87
  ]
88
  },
89
+ "h.5.attn.bias": {
90
  "type": "Distributed",
91
  "shape": [
92
+ 1,
93
+ 1,
94
+ 1024,
95
+ 1024
96
  ],
97
  "dtype": "F32",
98
  "chunks": [
99
  {
100
  "offsets": [
101
+ 0,
102
+ 0,
103
+ 0,
104
  0
105
  ],
106
  "shape": [
107
+ 1,
108
+ 1,
109
+ 1024,
110
+ 512
111
  ],
112
  "filename_index": 0
113
  },
114
  {
115
  "offsets": [
116
+ 0,
117
+ 0,
118
+ 0,
119
+ 512
120
  ],
121
  "shape": [
122
+ 1,
123
+ 1,
124
+ 1024,
125
+ 512
126
  ],
127
  "filename_index": 1
128
  }
129
  ]
130
  },
131
+ "h.9.mlp.c_fc.bias": {
132
  "type": "Distributed",
133
  "shape": [
134
+ 3072
135
  ],
136
  "dtype": "F32",
137
  "chunks": [
 
140
  0
141
  ],
142
  "shape": [
143
+ 1536
144
  ],
145
  "filename_index": 0
146
  },
147
  {
148
  "offsets": [
149
+ 1536
150
  ],
151
  "shape": [
152
+ 1536
153
  ],
154
  "filename_index": 1
155
  }
156
  ]
157
  },
158
+ "h.11.mlp.c_fc.weight": {
159
  "type": "Distributed",
160
  "shape": [
161
+ 768,
162
  3072
163
  ],
164
  "dtype": "F32",
165
  "chunks": [
166
  {
167
  "offsets": [
168
+ 0,
169
  0
170
  ],
171
  "shape": [
172
+ 768,
173
  1536
174
  ],
175
  "filename_index": 0
176
  },
177
  {
178
  "offsets": [
179
+ 0,
180
  1536
181
  ],
182
  "shape": [
183
+ 768,
184
  1536
185
  ],
186
  "filename_index": 1
187
  }
188
  ]
189
  },
190
+ "h.11.ln_2.weight": {
191
  "type": "Distributed",
192
  "shape": [
193
  768
 
214
  }
215
  ]
216
  },
217
+ "h.4.ln_1.weight": {
218
  "type": "Distributed",
219
  "shape": [
220
  768
 
241
  }
242
  ]
243
  },
244
+ "h.8.attn.c_proj.bias": {
245
  "type": "Distributed",
246
  "shape": [
247
  768
 
268
  }
269
  ]
270
  },
271
+ "h.8.ln_1.weight": {
272
  "type": "Distributed",
273
  "shape": [
274
+ 768
 
 
 
275
  ],
276
  "dtype": "F32",
277
  "chunks": [
278
  {
279
  "offsets": [
 
 
 
280
  0
281
  ],
282
  "shape": [
283
+ 384
 
 
 
284
  ],
285
  "filename_index": 0
286
  },
287
  {
288
  "offsets": [
289
+ 384
 
 
 
290
  ],
291
  "shape": [
292
+ 384
 
 
 
293
  ],
294
  "filename_index": 1
295
  }
296
  ]
297
  },
298
+ "h.10.attn.c_attn.weight": {
299
  "type": "Distributed",
300
  "shape": [
301
  768,
302
+ 2304
303
  ],
304
  "dtype": "F32",
305
  "chunks": [
 
310
  ],
311
  "shape": [
312
  768,
313
+ 1152
314
  ],
315
  "filename_index": 0
316
  },
317
  {
318
  "offsets": [
319
  0,
320
+ 1152
321
  ],
322
  "shape": [
323
  768,
324
+ 1152
325
  ],
326
  "filename_index": 1
327
  }
328
  ]
329
  },
330
+ "ln_f.bias": {
331
  "type": "Distributed",
332
  "shape": [
333
  768
 
354
  }
355
  ]
356
  },
357
+ "h.3.ln_1.weight": {
358
  "type": "Distributed",
359
  "shape": [
360
  768
 
381
  }
382
  ]
383
  },
384
+ "h.0.mlp.c_fc.weight": {
385
  "type": "Distributed",
386
  "shape": [
387
+ 768,
388
+ 3072
389
  ],
390
  "dtype": "F32",
391
  "chunks": [
392
  {
393
  "offsets": [
394
+ 0,
395
  0
396
  ],
397
  "shape": [
398
+ 768,
399
+ 1536
400
  ],
401
  "filename_index": 0
402
  },
403
  {
404
  "offsets": [
405
+ 0,
406
+ 1536
407
  ],
408
  "shape": [
409
+ 768,
410
+ 1536
411
  ],
412
  "filename_index": 1
413
  }
414
  ]
415
  },
416
+ "h.10.attn.c_proj.weight": {
417
  "type": "Distributed",
418
  "shape": [
419
+ 768,
420
  768
421
  ],
422
  "dtype": "F32",
423
  "chunks": [
424
  {
425
  "offsets": [
426
+ 0,
427
  0
428
  ],
429
  "shape": [
430
+ 384,
431
+ 768
432
  ],
433
  "filename_index": 0
434
  },
435
  {
436
  "offsets": [
437
+ 384,
438
+ 0
439
  ],
440
  "shape": [
441
+ 384,
442
+ 768
443
  ],
444
  "filename_index": 1
445
  }
446
  ]
447
  },
448
+ "h.1.attn.c_proj.weight": {
449
  "type": "Distributed",
450
  "shape": [
451
  768,
452
+ 768
453
  ],
454
  "dtype": "F32",
455
  "chunks": [
 
459
  0
460
  ],
461
  "shape": [
462
+ 384,
463
+ 768
464
  ],
465
  "filename_index": 0
466
  },
467
  {
468
  "offsets": [
469
+ 384,
470
+ 0
471
  ],
472
  "shape": [
473
+ 384,
474
+ 768
475
  ],
476
  "filename_index": 1
477
  }
478
  ]
479
  },
480
+ "h.3.ln_2.bias": {
481
  "type": "Distributed",
482
  "shape": [
483
+ 768
 
484
  ],
485
  "dtype": "F32",
486
  "chunks": [
487
  {
488
  "offsets": [
 
489
  0
490
  ],
491
  "shape": [
492
+ 384
 
493
  ],
494
  "filename_index": 0
495
  },
496
  {
497
  "offsets": [
498
+ 384
 
499
  ],
500
  "shape": [
501
+ 384
 
502
  ],
503
  "filename_index": 1
504
  }
505
  ]
506
  },
507
+ "h.2.attn.c_proj.bias": {
508
  "type": "Distributed",
509
  "shape": [
510
+ 768
511
  ],
512
  "dtype": "F32",
513
  "chunks": [
 
516
  0
517
  ],
518
  "shape": [
519
+ 384
520
  ],
521
  "filename_index": 0
522
  },
523
  {
524
  "offsets": [
525
+ 384
526
  ],
527
  "shape": [
528
+ 384
529
  ],
530
  "filename_index": 1
531
  }
532
  ]
533
  },
534
+ "h.6.ln_2.weight": {
535
  "type": "Distributed",
536
  "shape": [
537
+ 768
538
  ],
539
  "dtype": "F32",
540
  "chunks": [
 
543
  0
544
  ],
545
  "shape": [
546
+ 384
547
  ],
548
  "filename_index": 0
549
  },
550
  {
551
  "offsets": [
552
+ 384
553
  ],
554
  "shape": [
555
+ 384
556
  ],
557
  "filename_index": 1
558
  }
559
  ]
560
  },
561
+ "h.3.attn.c_attn.weight": {
562
  "type": "Distributed",
563
  "shape": [
564
+ 768,
565
+ 2304
566
  ],
567
  "dtype": "F32",
568
  "chunks": [
569
  {
570
  "offsets": [
571
+ 0,
572
  0
573
  ],
574
  "shape": [
575
+ 768,
576
+ 1152
577
  ],
578
  "filename_index": 0
579
  },
580
  {
581
  "offsets": [
582
+ 0,
583
+ 1152
584
  ],
585
  "shape": [
586
+ 768,
587
+ 1152
588
  ],
589
  "filename_index": 1
590
  }
591
  ]
592
  },
593
+ "h.8.mlp.c_proj.weight": {
594
  "type": "Distributed",
595
  "shape": [
596
+ 3072,
597
  768
598
  ],
599
  "dtype": "F32",
600
  "chunks": [
601
  {
602
  "offsets": [
603
+ 0,
604
  0
605
  ],
606
  "shape": [
607
+ 1536,
608
+ 768
609
  ],
610
  "filename_index": 0
611
  },
612
  {
613
  "offsets": [
614
+ 1536,
615
+ 0
616
  ],
617
  "shape": [
618
+ 1536,
619
+ 768
620
  ],
621
  "filename_index": 1
622
  }
623
  ]
624
  },
625
+ "h.11.attn.c_attn.weight": {
626
  "type": "Distributed",
627
  "shape": [
628
+ 768,
629
+ 2304
630
  ],
631
  "dtype": "F32",
632
  "chunks": [
633
  {
634
  "offsets": [
635
+ 0,
636
  0
637
  ],
638
  "shape": [
639
+ 768,
640
+ 1152
641
  ],
642
  "filename_index": 0
643
  },
644
  {
645
  "offsets": [
646
+ 0,
647
+ 1152
648
  ],
649
  "shape": [
650
+ 768,
651
+ 1152
652
  ],
653
  "filename_index": 1
654
  }
655
  ]
656
  },
657
+ "h.6.attn.c_proj.weight": {
658
  "type": "Distributed",
659
  "shape": [
660
+ 768,
661
+ 768
662
  ],
663
  "dtype": "F32",
664
  "chunks": [
665
  {
666
  "offsets": [
667
+ 0,
668
  0
669
  ],
670
  "shape": [
671
+ 384,
672
+ 768
673
  ],
674
  "filename_index": 0
675
  },
676
  {
677
  "offsets": [
678
+ 384,
679
+ 0
680
  ],
681
  "shape": [
682
+ 384,
683
+ 768
684
  ],
685
  "filename_index": 1
686
  }
687
  ]
688
  },
689
+ "h.10.attn.bias": {
690
  "type": "Distributed",
691
  "shape": [
692
  1,
 
728
  }
729
  ]
730
  },
731
+ "h.1.mlp.c_fc.weight": {
732
  "type": "Distributed",
733
  "shape": [
734
+ 768,
735
  3072
736
  ],
737
  "dtype": "F32",
738
  "chunks": [
739
  {
740
  "offsets": [
741
+ 0,
742
  0
743
  ],
744
  "shape": [
745
+ 768,
746
  1536
747
  ],
748
  "filename_index": 0
749
  },
750
  {
751
  "offsets": [
752
+ 0,
753
  1536
754
  ],
755
  "shape": [
756
+ 768,
757
  1536
758
  ],
759
  "filename_index": 1
760
  }
761
  ]
762
  },
763
+ "h.3.mlp.c_fc.bias": {
764
  "type": "Distributed",
765
  "shape": [
766
+ 3072
767
  ],
768
  "dtype": "F32",
769
  "chunks": [
 
772
  0
773
  ],
774
  "shape": [
775
+ 1536
776
  ],
777
  "filename_index": 0
778
  },
779
  {
780
  "offsets": [
781
+ 1536
782
  ],
783
  "shape": [
784
+ 1536
785
  ],
786
  "filename_index": 1
787
  }
788
  ]
789
  },
790
+ "h.8.attn.c_attn.weight": {
791
  "type": "Distributed",
792
  "shape": [
793
+ 768,
794
+ 2304
795
  ],
796
  "dtype": "F32",
797
  "chunks": [
 
801
  0
802
  ],
803
  "shape": [
804
+ 768,
805
+ 1152
806
  ],
807
  "filename_index": 0
808
  },
809
  {
810
  "offsets": [
811
+ 0,
812
+ 1152
813
  ],
814
  "shape": [
815
+ 768,
816
+ 1152
817
  ],
818
  "filename_index": 1
819
  }
820
  ]
821
  },
822
+ "h.10.mlp.c_fc.weight": {
823
  "type": "Distributed",
824
  "shape": [
825
+ 768,
826
+ 3072
827
  ],
828
  "dtype": "F32",
829
  "chunks": [
830
  {
831
  "offsets": [
832
+ 0,
833
  0
834
  ],
835
  "shape": [
836
+ 768,
837
+ 1536
838
  ],
839
  "filename_index": 0
840
  },
841
  {
842
  "offsets": [
843
+ 0,
844
+ 1536
845
  ],
846
  "shape": [
847
+ 768,
848
+ 1536
849
  ],
850
  "filename_index": 1
851
  }
852
  ]
853
  },
854
+ "h.1.mlp.c_fc.bias": {
855
  "type": "Distributed",
856
  "shape": [
857
+ 3072
 
858
  ],
859
  "dtype": "F32",
860
  "chunks": [
861
  {
862
  "offsets": [
 
863
  0
864
  ],
865
  "shape": [
866
+ 1536
 
867
  ],
868
  "filename_index": 0
869
  },
870
  {
871
  "offsets": [
872
+ 1536
 
873
  ],
874
  "shape": [
875
+ 1536
 
876
  ],
877
  "filename_index": 1
878
  }
879
  ]
880
  },
881
+ "h.3.attn.c_proj.weight": {
882
  "type": "Distributed",
883
  "shape": [
884
+ 768,
885
+ 768
886
  ],
887
  "dtype": "F32",
888
  "chunks": [
889
  {
890
  "offsets": [
891
+ 0,
892
  0
893
  ],
894
  "shape": [
895
+ 384,
896
+ 768
897
  ],
898
  "filename_index": 0
899
  },
900
  {
901
  "offsets": [
902
+ 384,
903
+ 0
904
  ],
905
  "shape": [
906
+ 384,
907
+ 768
908
  ],
909
  "filename_index": 1
910
  }
911
  ]
912
  },
913
+ "ln_f.weight": {
914
  "type": "Distributed",
915
  "shape": [
916
+ 768
 
917
  ],
918
  "dtype": "F32",
919
  "chunks": [
920
  {
921
  "offsets": [
 
922
  0
923
  ],
924
  "shape": [
925
+ 384
 
926
  ],
927
  "filename_index": 0
928
  },
929
  {
930
  "offsets": [
931
+ 384
 
932
  ],
933
  "shape": [
934
+ 384
 
935
  ],
936
  "filename_index": 1
937
  }
938
  ]
939
  },
940
+ "h.7.attn.c_attn.bias": {
941
  "type": "Distributed",
942
  "shape": [
943
+ 2304
944
  ],
945
  "dtype": "F32",
946
  "chunks": [
 
949
  0
950
  ],
951
  "shape": [
952
+ 1152
953
  ],
954
  "filename_index": 0
955
  },
956
  {
957
  "offsets": [
958
+ 1152
959
  ],
960
  "shape": [
961
+ 1152
962
  ],
963
  "filename_index": 1
964
  }
965
  ]
966
  },
967
+ "h.11.attn.c_attn.bias": {
968
  "type": "Distributed",
969
  "shape": [
970
+ 2304
971
  ],
972
  "dtype": "F32",
973
  "chunks": [
 
976
  0
977
  ],
978
  "shape": [
979
+ 1152
980
  ],
981
  "filename_index": 0
982
  },
983
  {
984
  "offsets": [
985
+ 1152
986
  ],
987
  "shape": [
988
+ 1152
989
  ],
990
  "filename_index": 1
991
  }
992
  ]
993
  },
994
+ "h.11.ln_1.weight": {
995
  "type": "Distributed",
996
  "shape": [
997
  768
 
1018
  }
1019
  ]
1020
  },
1021
+ "h.5.attn.c_proj.weight": {
1022
  "type": "Distributed",
1023
  "shape": [
1024
+ 768,
1025
  768
1026
  ],
1027
  "dtype": "F32",
1028
  "chunks": [
1029
  {
1030
  "offsets": [
1031
+ 0,
1032
  0
1033
  ],
1034
  "shape": [
1035
+ 384,
1036
+ 768
1037
  ],
1038
  "filename_index": 0
1039
  },
1040
  {
1041
  "offsets": [
1042
+ 384,
1043
+ 0
1044
  ],
1045
  "shape": [
1046
+ 384,
1047
+ 768
1048
  ],
1049
  "filename_index": 1
1050
  }
1051
  ]
1052
  },
1053
+ "h.9.mlp.c_proj.weight": {
1054
  "type": "Distributed",
1055
  "shape": [
1056
+ 3072,
1057
  768
1058
  ],
1059
  "dtype": "F32",
1060
  "chunks": [
1061
  {
1062
  "offsets": [
1063
+ 0,
1064
  0
1065
  ],
1066
  "shape": [
1067
+ 1536,
1068
+ 768
1069
  ],
1070
  "filename_index": 0
1071
  },
1072
  {
1073
  "offsets": [
1074
+ 1536,
1075
+ 0
1076
  ],
1077
  "shape": [
1078
+ 1536,
1079
+ 768
1080
  ],
1081
  "filename_index": 1
1082
  }
1083
  ]
1084
  },
1085
+ "h.4.mlp.c_fc.bias": {
1086
  "type": "Distributed",
1087
  "shape": [
1088
+ 3072
 
1089
  ],
1090
  "dtype": "F32",
1091
  "chunks": [
1092
  {
1093
  "offsets": [
 
1094
  0
1095
  ],
1096
  "shape": [
1097
+ 1536
 
1098
  ],
1099
  "filename_index": 0
1100
  },
1101
  {
1102
  "offsets": [
1103
+ 1536
 
1104
  ],
1105
  "shape": [
1106
+ 1536
 
1107
  ],
1108
  "filename_index": 1
1109
  }
1110
  ]
1111
  },
1112
+ "h.3.ln_1.bias": {
1113
  "type": "Distributed",
1114
  "shape": [
1115
  768
 
1136
  }
1137
  ]
1138
  },
1139
+ "h.7.ln_1.weight": {
1140
  "type": "Distributed",
1141
  "shape": [
1142
  768
 
1163
  }
1164
  ]
1165
  },
1166
+ "h.8.attn.bias": {
1167
  "type": "Distributed",
1168
  "shape": [
1169
+ 1,
1170
+ 1,
1171
+ 1024,
1172
+ 1024
1173
  ],
1174
  "dtype": "F32",
1175
  "chunks": [
1176
  {
1177
  "offsets": [
1178
+ 0,
1179
+ 0,
1180
+ 0,
1181
  0
1182
  ],
1183
  "shape": [
1184
+ 1,
1185
+ 1,
1186
+ 1024,
1187
+ 512
1188
  ],
1189
  "filename_index": 0
1190
  },
1191
  {
1192
  "offsets": [
1193
+ 0,
1194
+ 0,
1195
+ 0,
1196
+ 512
1197
  ],
1198
  "shape": [
1199
+ 1,
1200
+ 1,
1201
+ 1024,
1202
+ 512
1203
  ],
1204
  "filename_index": 1
1205
  }
1206
  ]
1207
  },
1208
+ "h.11.attn.bias": {
1209
  "type": "Distributed",
1210
  "shape": [
1211
+ 1,
1212
+ 1,
1213
+ 1024,
1214
+ 1024
1215
  ],
1216
  "dtype": "F32",
1217
  "chunks": [
1218
  {
1219
  "offsets": [
1220
+ 0,
1221
+ 0,
1222
  0,
1223
  0
1224
  ],
1225
  "shape": [
1226
+ 1,
1227
+ 1,
1228
+ 1024,
1229
+ 512
1230
  ],
1231
  "filename_index": 0
1232
  },
1233
  {
1234
  "offsets": [
1235
  0,
1236
+ 0,
1237
+ 0,
1238
+ 512
1239
  ],
1240
  "shape": [
1241
+ 1,
1242
+ 1,
1243
+ 1024,
1244
+ 512
1245
  ],
1246
  "filename_index": 1
1247
  }
1248
  ]
1249
  },
1250
+ "h.5.ln_1.bias": {
1251
  "type": "Distributed",
1252
  "shape": [
1253
  768
 
1274
  }
1275
  ]
1276
  },
1277
+ "h.7.mlp.c_proj.weight": {
1278
  "type": "Distributed",
1279
  "shape": [
1280
+ 3072,
1281
  768
1282
  ],
1283
  "dtype": "F32",
1284
  "chunks": [
1285
  {
1286
  "offsets": [
1287
+ 0,
1288
  0
1289
  ],
1290
  "shape": [
1291
+ 1536,
1292
+ 768
1293
  ],
1294
  "filename_index": 0
1295
  },
1296
  {
1297
  "offsets": [
1298
+ 1536,
1299
+ 0
1300
  ],
1301
  "shape": [
1302
+ 1536,
1303
+ 768
1304
  ],
1305
  "filename_index": 1
1306
  }
1307
  ]
1308
  },
1309
+ "h.10.mlp.c_proj.bias": {
1310
  "type": "Distributed",
1311
  "shape": [
1312
  768
 
1333
  }
1334
  ]
1335
  },
1336
+ "h.3.mlp.c_proj.weight": {
1337
  "type": "Distributed",
1338
  "shape": [
1339
+ 3072,
1340
  768
1341
  ],
1342
  "dtype": "F32",
1343
  "chunks": [
1344
  {
1345
  "offsets": [
1346
+ 0,
1347
  0
1348
  ],
1349
  "shape": [
1350
+ 1536,
1351
+ 768
1352
  ],
1353
  "filename_index": 0
1354
  },
1355
  {
1356
  "offsets": [
1357
+ 1536,
1358
+ 0
1359
  ],
1360
  "shape": [
1361
+ 1536,
1362
+ 768
1363
  ],
1364
  "filename_index": 1
1365
  }
1366
  ]
1367
  },
1368
+ "h.6.attn.c_attn.weight": {
1369
  "type": "Distributed",
1370
  "shape": [
1371
+ 768,
1372
+ 2304
1373
  ],
1374
  "dtype": "F32",
1375
  "chunks": [
 
1379
  0
1380
  ],
1381
  "shape": [
1382
+ 768,
1383
+ 1152
1384
  ],
1385
  "filename_index": 0
1386
  },
1387
  {
1388
  "offsets": [
1389
  0,
1390
+ 1152
1391
  ],
1392
  "shape": [
1393
+ 768,
1394
+ 1152
1395
  ],
1396
  "filename_index": 1
1397
  }
1398
  ]
1399
  },
1400
+ "h.8.ln_2.weight": {
1401
  "type": "Distributed",
1402
  "shape": [
1403
  768
 
1424
  }
1425
  ]
1426
  },
1427
+ "h.7.attn.c_attn.weight": {
1428
  "type": "Distributed",
1429
  "shape": [
1430
  768,
1431
+ 2304
1432
  ],
1433
  "dtype": "F32",
1434
  "chunks": [
 
1438
  0
1439
  ],
1440
  "shape": [
1441
+ 768,
1442
+ 1152
1443
  ],
1444
  "filename_index": 0
1445
  },
1446
  {
1447
  "offsets": [
1448
+ 0,
1449
+ 1152
1450
  ],
1451
  "shape": [
1452
+ 768,
1453
+ 1152
1454
  ],
1455
  "filename_index": 1
1456
  }
1457
  ]
1458
  },
1459
+ "h.8.attn.c_proj.weight": {
1460
  "type": "Distributed",
1461
  "shape": [
1462
+ 768,
1463
  768
1464
  ],
1465
  "dtype": "F32",
1466
  "chunks": [
1467
  {
1468
  "offsets": [
1469
+ 0,
1470
  0
1471
  ],
1472
  "shape": [
1473
+ 384,
1474
+ 768
1475
  ],
1476
  "filename_index": 0
1477
  },
1478
  {
1479
  "offsets": [
1480
+ 384,
1481
+ 0
1482
  ],
1483
  "shape": [
1484
+ 384,
1485
+ 768
1486
  ],
1487
  "filename_index": 1
1488
  }
1489
  ]
1490
  },
1491
+ "h.10.attn.c_proj.bias": {
1492
  "type": "Distributed",
1493
  "shape": [
1494
  768
 
1515
  }
1516
  ]
1517
  },
1518
+ "h.7.ln_2.weight": {
1519
  "type": "Distributed",
1520
  "shape": [
1521
  768
 
1542
  }
1543
  ]
1544
  },
1545
+ "h.8.mlp.c_fc.bias": {
1546
  "type": "Distributed",
1547
  "shape": [
1548
+ 3072
 
1549
  ],
1550
  "dtype": "F32",
1551
  "chunks": [
1552
  {
1553
  "offsets": [
 
1554
  0
1555
  ],
1556
  "shape": [
1557
+ 1536
 
1558
  ],
1559
  "filename_index": 0
1560
  },
1561
  {
1562
  "offsets": [
1563
+ 1536
 
1564
  ],
1565
  "shape": [
1566
+ 1536
 
1567
  ],
1568
  "filename_index": 1
1569
  }
1570
  ]
1571
  },
1572
+ "h.2.mlp.c_fc.weight": {
1573
  "type": "Distributed",
1574
  "shape": [
1575
  768,
1576
+ 3072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1577
  ],
1578
  "dtype": "F32",
1579
  "chunks": [
1580
  {
1581
  "offsets": [
 
 
1582
  0,
1583
  0
1584
  ],
1585
  "shape": [
1586
+ 768,
1587
+ 1536
 
 
1588
  ],
1589
  "filename_index": 0
1590
  },
1591
  {
1592
  "offsets": [
1593
  0,
1594
+ 1536
 
 
1595
  ],
1596
  "shape": [
1597
+ 768,
1598
+ 1536
 
 
1599
  ],
1600
  "filename_index": 1
1601
  }
1602
  ]
1603
  },
1604
+ "h.4.ln_2.weight": {
1605
  "type": "Distributed",
1606
  "shape": [
 
1607
  768
1608
  ],
1609
  "dtype": "F32",
1610
  "chunks": [
1611
  {
1612
  "offsets": [
 
1613
  0
1614
  ],
1615
  "shape": [
1616
+ 384
 
1617
  ],
1618
  "filename_index": 0
1619
  },
1620
  {
1621
  "offsets": [
1622
+ 384
 
1623
  ],
1624
  "shape": [
1625
+ 384
 
1626
  ],
1627
  "filename_index": 1
1628
  }
1629
  ]
1630
  },
1631
+ "h.9.attn.c_attn.bias": {
1632
  "type": "Distributed",
1633
  "shape": [
1634
+ 2304
1635
  ],
1636
  "dtype": "F32",
1637
  "chunks": [
 
1640
  0
1641
  ],
1642
  "shape": [
1643
+ 1152
1644
  ],
1645
  "filename_index": 0
1646
  },
1647
  {
1648
  "offsets": [
1649
+ 1152
1650
  ],
1651
  "shape": [
1652
+ 1152
1653
  ],
1654
  "filename_index": 1
1655
  }
1656
  ]
1657
  },
1658
+ "h.4.attn.c_proj.weight": {
1659
  "type": "Distributed",
1660
  "shape": [
1661
  768,
 
1687
  }
1688
  ]
1689
  },
1690
+ "h.3.attn.c_attn.bias": {
1691
  "type": "Distributed",
1692
  "shape": [
1693
+ 2304
1694
  ],
1695
  "dtype": "F32",
1696
  "chunks": [
 
1699
  0
1700
  ],
1701
  "shape": [
1702
+ 1152
1703
  ],
1704
  "filename_index": 0
1705
  },
1706
  {
1707
  "offsets": [
1708
+ 1152
1709
  ],
1710
  "shape": [
1711
+ 1152
1712
  ],
1713
  "filename_index": 1
1714
  }
1715
  ]
1716
  },
1717
+ "h.10.mlp.c_proj.weight": {
1718
  "type": "Distributed",
1719
  "shape": [
1720
+ 3072,
1721
  768
1722
  ],
1723
  "dtype": "F32",
1724
  "chunks": [
1725
  {
1726
  "offsets": [
1727
+ 0,
1728
  0
1729
  ],
1730
  "shape": [
1731
+ 1536,
1732
+ 768
1733
  ],
1734
  "filename_index": 0
1735
  },
1736
  {
1737
  "offsets": [
1738
+ 1536,
1739
+ 0
1740
  ],
1741
  "shape": [
1742
+ 1536,
1743
+ 768
1744
  ],
1745
  "filename_index": 1
1746
  }
1747
  ]
1748
  },
1749
+ "wte.weight": {
1750
  "type": "Distributed",
1751
  "shape": [
1752
+ 50257,
1753
  768
1754
  ],
1755
  "dtype": "F32",
1756
  "chunks": [
1757
  {
1758
  "offsets": [
1759
+ 0,
1760
  0
1761
  ],
1762
  "shape": [
1763
+ 50257,
1764
  384
1765
  ],
1766
  "filename_index": 0
1767
  },
1768
  {
1769
  "offsets": [
1770
+ 0,
1771
  384
1772
  ],
1773
  "shape": [
1774
+ 50257,
1775
  384
1776
  ],
1777
  "filename_index": 1
1778
  }
1779
  ]
1780
  },
1781
+ "h.10.ln_2.bias": {
1782
  "type": "Distributed",
1783
  "shape": [
1784
  768
 
1805
  }
1806
  ]
1807
  },
1808
+ "h.9.attn.c_attn.weight": {
1809
  "type": "Distributed",
1810
  "shape": [
1811
+ 768,
1812
+ 2304
1813
  ],
1814
  "dtype": "F32",
1815
  "chunks": [
1816
  {
1817
  "offsets": [
1818
+ 0,
1819
  0
1820
  ],
1821
  "shape": [
1822
+ 768,
1823
+ 1152
1824
  ],
1825
  "filename_index": 0
1826
  },
1827
  {
1828
  "offsets": [
1829
+ 0,
1830
+ 1152
1831
  ],
1832
  "shape": [
1833
+ 768,
1834
+ 1152
1835
  ],
1836
  "filename_index": 1
1837
  }
1838
  ]
1839
  },
1840
+ "h.2.mlp.c_proj.bias": {
1841
  "type": "Distributed",
1842
  "shape": [
1843
  768
 
1864
  }
1865
  ]
1866
  },
1867
+ "h.1.attn.c_proj.bias": {
1868
  "type": "Distributed",
1869
  "shape": [
1870
  768
 
1891
  }
1892
  ]
1893
  },
1894
+ "h.9.ln_2.weight": {
1895
  "type": "Distributed",
1896
  "shape": [
1897
+ 768
 
1898
  ],
1899
  "dtype": "F32",
1900
  "chunks": [
1901
  {
1902
  "offsets": [
 
1903
  0
1904
  ],
1905
  "shape": [
1906
+ 384
 
1907
  ],
1908
  "filename_index": 0
1909
  },
1910
  {
1911
  "offsets": [
1912
+ 384
 
1913
  ],
1914
  "shape": [
1915
+ 384
 
1916
  ],
1917
  "filename_index": 1
1918
  }
1919
  ]
1920
  },
1921
+ "h.2.mlp.c_proj.weight": {
1922
  "type": "Distributed",
1923
  "shape": [
1924
+ 3072,
1925
  768
1926
  ],
1927
  "dtype": "F32",
1928
  "chunks": [
1929
  {
1930
  "offsets": [
1931
+ 0,
1932
  0
1933
  ],
1934
  "shape": [
1935
+ 1536,
1936
+ 768
1937
  ],
1938
  "filename_index": 0
1939
  },
1940
  {
1941
  "offsets": [
1942
+ 1536,
1943
+ 0
1944
  ],
1945
  "shape": [
1946
+ 1536,
1947
+ 768
1948
  ],
1949
  "filename_index": 1
1950
  }
1951
  ]
1952
  },
1953
+ "h.0.attn.bias": {
1954
  "type": "Distributed",
1955
  "shape": [
1956
+ 1,
1957
+ 1,
1958
+ 1024,
1959
+ 1024
1960
  ],
1961
  "dtype": "F32",
1962
  "chunks": [
1963
  {
1964
  "offsets": [
1965
+ 0,
1966
+ 0,
1967
  0,
1968
  0
1969
  ],
1970
  "shape": [
1971
+ 1,
1972
+ 1,
1973
+ 1024,
1974
+ 512
1975
  ],
1976
  "filename_index": 0
1977
  },
1978
  {
1979
  "offsets": [
1980
+ 0,
1981
+ 0,
1982
+ 0,
1983
+ 512
1984
  ],
1985
  "shape": [
1986
+ 1,
1987
+ 1,
1988
+ 1024,
1989
+ 512
1990
  ],
1991
  "filename_index": 1
1992
  }
1993
  ]
1994
  },
1995
+ "h.5.attn.c_attn.weight": {
1996
  "type": "Distributed",
1997
  "shape": [
1998
  768,
1999
+ 2304
2000
  ],
2001
  "dtype": "F32",
2002
  "chunks": [
 
2007
  ],
2008
  "shape": [
2009
  768,
2010
+ 1152
2011
  ],
2012
  "filename_index": 0
2013
  },
2014
  {
2015
  "offsets": [
2016
  0,
2017
+ 1152
2018
  ],
2019
  "shape": [
2020
  768,
2021
+ 1152
2022
  ],
2023
  "filename_index": 1
2024
  }
2025
  ]
2026
  },
2027
+ "h.3.attn.c_proj.bias": {
2028
  "type": "Distributed",
2029
  "shape": [
2030
+ 768
2031
  ],
2032
  "dtype": "F32",
2033
  "chunks": [
 
2036
  0
2037
  ],
2038
  "shape": [
2039
+ 384
2040
  ],
2041
  "filename_index": 0
2042
  },
2043
  {
2044
  "offsets": [
2045
+ 384
2046
  ],
2047
  "shape": [
2048
+ 384
2049
  ],
2050
  "filename_index": 1
2051
  }
2052
  ]
2053
  },
2054
+ "h.7.attn.bias": {
2055
  "type": "Distributed",
2056
  "shape": [
2057
+ 1,
2058
+ 1,
2059
+ 1024,
2060
+ 1024
2061
  ],
2062
  "dtype": "F32",
2063
  "chunks": [
2064
  {
2065
  "offsets": [
2066
+ 0,
2067
+ 0,
2068
+ 0,
2069
  0
2070
  ],
2071
  "shape": [
2072
+ 1,
2073
+ 1,
2074
+ 1024,
2075
+ 512
2076
  ],
2077
  "filename_index": 0
2078
  },
2079
  {
2080
  "offsets": [
2081
+ 0,
2082
+ 0,
2083
+ 0,
2084
+ 512
2085
  ],
2086
  "shape": [
2087
+ 1,
2088
+ 1,
2089
+ 1024,
2090
+ 512
2091
  ],
2092
  "filename_index": 1
2093
  }
2094
  ]
2095
  },
2096
+ "h.3.mlp.c_fc.weight": {
2097
  "type": "Distributed",
2098
  "shape": [
2099
  768,
 
2125
  }
2126
  ]
2127
  },
2128
+ "h.8.mlp.c_fc.weight": {
2129
  "type": "Distributed",
2130
  "shape": [
2131
+ 768,
2132
+ 3072
2133
  ],
2134
  "dtype": "F32",
2135
  "chunks": [
 
2139
  0
2140
  ],
2141
  "shape": [
2142
+ 768,
2143
+ 1536
2144
  ],
2145
  "filename_index": 0
2146
  },
2147
  {
2148
  "offsets": [
2149
+ 0,
2150
+ 1536
2151
  ],
2152
  "shape": [
2153
+ 768,
2154
+ 1536
2155
  ],
2156
  "filename_index": 1
2157
  }
2158
  ]
2159
  },
2160
+ "h.0.attn.c_proj.bias": {
2161
  "type": "Distributed",
2162
  "shape": [
 
2163
  768
2164
  ],
2165
  "dtype": "F32",
2166
  "chunks": [
2167
  {
2168
  "offsets": [
 
2169
  0
2170
  ],
2171
  "shape": [
2172
+ 384
 
2173
  ],
2174
  "filename_index": 0
2175
  },
2176
  {
2177
  "offsets": [
2178
+ 384
2179
+ ],
2180
+ "shape": [
2181
+ 384
2182
+ ],
2183
+ "filename_index": 1
2184
+ }
2185
+ ]
2186
+ },
2187
+ "h.8.mlp.c_proj.bias": {
2188
+ "type": "Distributed",
2189
+ "shape": [
2190
+ 768
2191
+ ],
2192
+ "dtype": "F32",
2193
+ "chunks": [
2194
+ {
2195
+ "offsets": [
2196
  0
2197
  ],
2198
  "shape": [
2199
+ 384
2200
+ ],
2201
+ "filename_index": 0
2202
+ },
2203
+ {
2204
+ "offsets": [
2205
+ 384
2206
+ ],
2207
+ "shape": [
2208
+ 384
2209
  ],
2210
  "filename_index": 1
2211
  }
2212
  ]
2213
  },
2214
+ "h.0.ln_1.weight": {
2215
  "type": "Distributed",
2216
  "shape": [
2217
  768
 
2238
  }
2239
  ]
2240
  },
2241
+ "h.3.attn.bias": {
2242
  "type": "Distributed",
2243
  "shape": [
2244
+ 1,
2245
+ 1,
2246
+ 1024,
2247
+ 1024
2248
  ],
2249
  "dtype": "F32",
2250
  "chunks": [
2251
  {
2252
  "offsets": [
2253
+ 0,
2254
+ 0,
2255
  0,
2256
  0
2257
  ],
2258
  "shape": [
2259
+ 1,
2260
+ 1,
2261
+ 1024,
2262
+ 512
2263
  ],
2264
  "filename_index": 0
2265
  },
2266
  {
2267
  "offsets": [
2268
  0,
2269
+ 0,
2270
+ 0,
2271
+ 512
2272
  ],
2273
  "shape": [
2274
+ 1,
2275
+ 1,
2276
+ 1024,
2277
+ 512
2278
  ],
2279
  "filename_index": 1
2280
  }
2281
  ]
2282
  },
2283
+ "h.6.ln_2.bias": {
2284
  "type": "Distributed",
2285
  "shape": [
2286
  768
 
2307
  }
2308
  ]
2309
  },
2310
+ "h.3.ln_2.weight": {
2311
  "type": "Distributed",
2312
  "shape": [
2313
  768
 
2334
  }
2335
  ]
2336
  },
2337
+ "h.4.ln_1.bias": {
2338
  "type": "Distributed",
2339
  "shape": [
 
2340
  768
2341
  ],
2342
  "dtype": "F32",
2343
  "chunks": [
2344
  {
2345
  "offsets": [
 
2346
  0
2347
  ],
2348
  "shape": [
 
2349
  384
2350
  ],
2351
  "filename_index": 0
2352
  },
2353
  {
2354
  "offsets": [
 
2355
  384
2356
  ],
2357
  "shape": [
 
2358
  384
2359
  ],
2360
  "filename_index": 1
2361
  }
2362
  ]
2363
  },
2364
+ "h.8.ln_2.bias": {
2365
  "type": "Distributed",
2366
  "shape": [
2367
+ 768
 
2368
  ],
2369
  "dtype": "F32",
2370
  "chunks": [
2371
  {
2372
  "offsets": [
 
2373
  0
2374
  ],
2375
  "shape": [
2376
+ 384
 
2377
  ],
2378
  "filename_index": 0
2379
  },
2380
  {
2381
  "offsets": [
2382
+ 384
 
2383
  ],
2384
  "shape": [
2385
+ 384
 
2386
  ],
2387
  "filename_index": 1
2388
  }
2389
  ]
2390
  },
2391
+ "h.1.ln_2.bias": {
2392
  "type": "Distributed",
2393
  "shape": [
2394
  768
 
2415
  }
2416
  ]
2417
  },
2418
+ "h.1.mlp.c_proj.bias": {
2419
  "type": "Distributed",
2420
  "shape": [
2421
+ 768
2422
  ],
2423
  "dtype": "F32",
2424
  "chunks": [
 
2427
  0
2428
  ],
2429
  "shape": [
2430
+ 384
2431
  ],
2432
  "filename_index": 0
2433
  },
2434
  {
2435
  "offsets": [
2436
+ 384
2437
  ],
2438
  "shape": [
2439
+ 384
2440
  ],
2441
  "filename_index": 1
2442
  }
2443
  ]
2444
  },
2445
+ "h.6.mlp.c_proj.weight": {
2446
  "type": "Distributed",
2447
  "shape": [
2448
+ 3072,
2449
+ 768
2450
  ],
2451
  "dtype": "F32",
2452
  "chunks": [
2453
  {
2454
  "offsets": [
2455
+ 0,
2456
  0
2457
  ],
2458
  "shape": [
2459
+ 1536,
2460
+ 768
2461
  ],
2462
  "filename_index": 0
2463
  },
2464
  {
2465
  "offsets": [
2466
+ 1536,
2467
+ 0
2468
  ],
2469
  "shape": [
2470
+ 1536,
2471
+ 768
2472
  ],
2473
  "filename_index": 1
2474
  }
2475
  ]
2476
  },
2477
+ "h.7.mlp.c_fc.weight": {
2478
  "type": "Distributed",
2479
  "shape": [
2480
+ 768,
2481
+ 3072
 
 
2482
  ],
2483
  "dtype": "F32",
2484
  "chunks": [
2485
  {
2486
  "offsets": [
 
 
2487
  0,
2488
  0
2489
  ],
2490
  "shape": [
2491
+ 768,
2492
+ 1536
 
 
2493
  ],
2494
  "filename_index": 0
2495
  },
2496
  {
2497
  "offsets": [
2498
  0,
2499
+ 1536
 
 
2500
  ],
2501
  "shape": [
2502
+ 768,
2503
+ 1536
 
 
2504
  ],
2505
  "filename_index": 1
2506
  }
2507
  ]
2508
  },
2509
+ "h.0.attn.c_proj.weight": {
2510
  "type": "Distributed",
2511
  "shape": [
2512
+ 768,
2513
  768
2514
  ],
2515
  "dtype": "F32",
 
2520
  0
2521
  ],
2522
  "shape": [
2523
+ 384,
2524
  768
2525
  ],
2526
  "filename_index": 0
2527
  },
2528
  {
2529
  "offsets": [
2530
+ 384,
2531
  0
2532
  ],
2533
  "shape": [
2534
+ 384,
2535
  768
2536
  ],
2537
  "filename_index": 1
2538
  }
2539
  ]
2540
  },
2541
+ "h.6.ln_1.weight": {
2542
  "type": "Distributed",
2543
  "shape": [
2544
+ 768
 
 
 
2545
  ],
2546
  "dtype": "F32",
2547
  "chunks": [
2548
  {
2549
  "offsets": [
 
 
 
2550
  0
2551
  ],
2552
  "shape": [
2553
+ 384
 
 
 
2554
  ],
2555
  "filename_index": 0
2556
  },
2557
  {
2558
  "offsets": [
2559
+ 384
 
 
 
2560
  ],
2561
  "shape": [
2562
+ 384
 
 
 
2563
  ],
2564
  "filename_index": 1
2565
  }
2566
  ]
2567
  },
2568
+ "h.5.attn.c_attn.bias": {
2569
  "type": "Distributed",
2570
  "shape": [
2571
+ 2304
2572
  ],
2573
  "dtype": "F32",
2574
  "chunks": [
 
2577
  0
2578
  ],
2579
  "shape": [
2580
+ 1152
2581
  ],
2582
  "filename_index": 0
2583
  },
2584
  {
2585
  "offsets": [
2586
+ 1152
2587
  ],
2588
  "shape": [
2589
+ 1152
2590
  ],
2591
  "filename_index": 1
2592
  }
2593
  ]
2594
  },
2595
+ "h.8.ln_1.bias": {
2596
  "type": "Distributed",
2597
  "shape": [
2598
  768
 
2619
  }
2620
  ]
2621
  },
2622
+ "h.4.mlp.c_proj.weight": {
2623
  "type": "Distributed",
2624
  "shape": [
2625
  3072,
 
2651
  }
2652
  ]
2653
  },
2654
+ "h.10.ln_1.bias": {
2655
  "type": "Distributed",
2656
  "shape": [
2657
  768
 
2678
  }
2679
  ]
2680
  },
2681
+ "h.1.ln_2.weight": {
2682
  "type": "Distributed",
2683
  "shape": [
2684
  768
 
2705
  }
2706
  ]
2707
  },
2708
+ "h.10.ln_1.weight": {
2709
  "type": "Distributed",
2710
  "shape": [
2711
  768
 
2732
  }
2733
  ]
2734
  },
2735
+ "h.11.mlp.c_fc.bias": {
2736
  "type": "Distributed",
2737
  "shape": [
2738
+ 3072
 
2739
  ],
2740
  "dtype": "F32",
2741
  "chunks": [
2742
  {
2743
  "offsets": [
 
2744
  0
2745
  ],
2746
  "shape": [
2747
+ 1536
 
2748
  ],
2749
  "filename_index": 0
2750
  },
2751
  {
2752
  "offsets": [
2753
+ 1536
 
2754
  ],
2755
  "shape": [
2756
+ 1536
 
2757
  ],
2758
  "filename_index": 1
2759
  }
2760
  ]
2761
  },
2762
+ "h.10.mlp.c_fc.bias": {
2763
  "type": "Distributed",
2764
  "shape": [
2765
+ 3072
 
 
 
2766
  ],
2767
  "dtype": "F32",
2768
  "chunks": [
2769
  {
2770
  "offsets": [
 
 
 
2771
  0
2772
  ],
2773
  "shape": [
2774
+ 1536
 
 
 
2775
  ],
2776
  "filename_index": 0
2777
  },
2778
  {
2779
  "offsets": [
2780
+ 1536
 
 
 
2781
  ],
2782
  "shape": [
2783
+ 1536
 
 
 
2784
  ],
2785
  "filename_index": 1
2786
  }
2787
  ]
2788
  },
2789
+ "h.5.mlp.c_proj.weight": {
2790
  "type": "Distributed",
2791
  "shape": [
2792
+ 3072,
2793
  768
2794
  ],
2795
  "dtype": "F32",
 
2800
  0
2801
  ],
2802
  "shape": [
2803
+ 1536,
2804
  768
2805
  ],
2806
  "filename_index": 0
2807
  },
2808
  {
2809
  "offsets": [
2810
+ 1536,
2811
  0
2812
  ],
2813
  "shape": [
2814
+ 1536,
2815
  768
2816
  ],
2817
  "filename_index": 1
2818
  }
2819
  ]
2820
  },
2821
+ "h.10.attn.c_attn.bias": {
2822
  "type": "Distributed",
2823
  "shape": [
2824
  2304
 
2845
  }
2846
  ]
2847
  },
2848
+ "h.11.ln_2.bias": {
2849
  "type": "Distributed",
2850
  "shape": [
2851
+ 768
 
2852
  ],
2853
  "dtype": "F32",
2854
  "chunks": [
2855
  {
2856
  "offsets": [
 
2857
  0
2858
  ],
2859
  "shape": [
2860
+ 384
 
2861
  ],
2862
  "filename_index": 0
2863
  },
2864
  {
2865
  "offsets": [
2866
+ 384
 
2867
  ],
2868
  "shape": [
2869
+ 384
 
2870
  ],
2871
  "filename_index": 1
2872
  }
2873
  ]
2874
  },
2875
+ "h.11.mlp.c_proj.bias": {
2876
  "type": "Distributed",
2877
  "shape": [
2878
  768
 
2899
  }
2900
  ]
2901
  },
2902
+ "h.5.ln_2.bias": {
2903
  "type": "Distributed",
2904
  "shape": [
2905
  768
 
2926
  }
2927
  ]
2928
  },
2929
+ "h.4.attn.c_attn.bias": {
2930
  "type": "Distributed",
2931
  "shape": [
2932
+ 2304
2933
  ],
2934
  "dtype": "F32",
2935
  "chunks": [
 
2938
  0
2939
  ],
2940
  "shape": [
2941
+ 1152
2942
  ],
2943
  "filename_index": 0
2944
  },
2945
  {
2946
  "offsets": [
2947
+ 1152
2948
  ],
2949
  "shape": [
2950
+ 1152
2951
  ],
2952
  "filename_index": 1
2953
  }
2954
  ]
2955
  },
2956
+ "h.4.ln_2.bias": {
2957
  "type": "Distributed",
2958
  "shape": [
2959
  768
 
2980
  }
2981
  ]
2982
  },
2983
+ "h.2.attn.c_attn.bias": {
2984
  "type": "Distributed",
2985
  "shape": [
2986
+ 2304
2987
  ],
2988
  "dtype": "F32",
2989
  "chunks": [
 
2992
  0
2993
  ],
2994
  "shape": [
2995
+ 1152
2996
  ],
2997
  "filename_index": 0
2998
  },
2999
  {
3000
  "offsets": [
3001
+ 1152
3002
  ],
3003
  "shape": [
3004
+ 1152
3005
  ],
3006
  "filename_index": 1
3007
  }
3008
  ]
3009
  },
3010
+ "h.2.ln_1.weight": {
3011
  "type": "Distributed",
3012
  "shape": [
3013
+ 768
 
3014
  ],
3015
  "dtype": "F32",
3016
  "chunks": [
3017
  {
3018
  "offsets": [
 
3019
  0
3020
  ],
3021
  "shape": [
3022
+ 384
 
3023
  ],
3024
  "filename_index": 0
3025
  },
3026
  {
3027
  "offsets": [
3028
+ 384
 
3029
  ],
3030
  "shape": [
3031
+ 384
 
3032
  ],
3033
  "filename_index": 1
3034
  }
3035
  ]
3036
  },
3037
+ "h.7.ln_1.bias": {
3038
  "type": "Distributed",
3039
  "shape": [
3040
+ 768
 
 
 
3041
  ],
3042
  "dtype": "F32",
3043
  "chunks": [
3044
  {
3045
  "offsets": [
 
 
 
3046
  0
3047
  ],
3048
  "shape": [
3049
+ 384
 
 
 
3050
  ],
3051
  "filename_index": 0
3052
  },
3053
  {
3054
  "offsets": [
3055
+ 384
 
 
 
3056
  ],
3057
  "shape": [
3058
+ 384
 
 
 
3059
  ],
3060
  "filename_index": 1
3061
  }
3062
  ]
3063
  },
3064
+ "h.9.attn.c_proj.weight": {
3065
  "type": "Distributed",
3066
  "shape": [
3067
+ 768,
3068
  768
3069
  ],
3070
  "dtype": "F32",
 
3075
  0
3076
  ],
3077
  "shape": [
3078
+ 384,
3079
  768
3080
  ],
3081
  "filename_index": 0
3082
  },
3083
  {
3084
  "offsets": [
3085
+ 384,
3086
  0
3087
  ],
3088
  "shape": [
3089
+ 384,
3090
  768
3091
  ],
3092
  "filename_index": 1
3093
  }
3094
  ]
3095
  },
3096
+ "h.0.mlp.c_proj.bias": {
3097
  "type": "Distributed",
3098
  "shape": [
3099
+ 768
 
3100
  ],
3101
  "dtype": "F32",
3102
  "chunks": [
3103
  {
3104
  "offsets": [
 
3105
  0
3106
  ],
3107
  "shape": [
3108
+ 384
 
3109
  ],
3110
  "filename_index": 0
3111
  },
3112
  {
3113
  "offsets": [
3114
+ 384
 
3115
  ],
3116
  "shape": [
3117
+ 384
 
3118
  ],
3119
  "filename_index": 1
3120
  }
3121
  ]
3122
  },
3123
+ "h.7.attn.c_proj.bias": {
3124
  "type": "Distributed",
3125
  "shape": [
3126
+ 768
 
3127
  ],
3128
  "dtype": "F32",
3129
  "chunks": [
3130
  {
3131
  "offsets": [
 
3132
  0
3133
  ],
3134
  "shape": [
3135
+ 384
 
3136
  ],
3137
  "filename_index": 0
3138
  },
3139
  {
3140
  "offsets": [
3141
+ 384
 
3142
  ],
3143
  "shape": [
3144
+ 384
 
3145
  ],
3146
  "filename_index": 1
3147
  }
3148
  ]
3149
  },
3150
+ "h.9.ln_1.bias": {
3151
  "type": "Distributed",
3152
  "shape": [
3153
  768
 
3174
  }
3175
  ]
3176
  },
3177
+ "h.1.mlp.c_proj.weight": {
3178
  "type": "Distributed",
3179
  "shape": [
3180
+ 3072,
3181
  768
3182
  ],
3183
  "dtype": "F32",
3184
  "chunks": [
3185
  {
3186
  "offsets": [
3187
+ 0,
3188
  0
3189
  ],
3190
  "shape": [
3191
+ 1536,
3192
+ 768
3193
  ],
3194
  "filename_index": 0
3195
  },
3196
  {
3197
  "offsets": [
3198
+ 1536,
3199
+ 0
3200
  ],
3201
  "shape": [
3202
+ 1536,
3203
+ 768
3204
  ],
3205
  "filename_index": 1
3206
  }
3207
  ]
3208
  },
3209
+ "h.11.mlp.c_proj.weight": {
3210
  "type": "Distributed",
3211
  "shape": [
3212
+ 3072,
3213
  768
3214
  ],
3215
  "dtype": "F32",
3216
  "chunks": [
3217
  {
3218
  "offsets": [
3219
+ 0,
3220
  0
3221
  ],
3222
  "shape": [
3223
+ 1536,
3224
+ 768
3225
  ],
3226
  "filename_index": 0
3227
  },
3228
  {
3229
  "offsets": [
3230
+ 1536,
3231
+ 0
3232
  ],
3233
  "shape": [
3234
+ 1536,
3235
+ 768
3236
  ],
3237
  "filename_index": 1
3238
  }
3239
  ]
3240
  },
3241
+ "h.0.ln_1.bias": {
3242
  "type": "Distributed",
3243
  "shape": [
3244
  768
 
3265
  }
3266
  ]
3267
  },
3268
+ "h.7.mlp.c_proj.bias": {
3269
  "type": "Distributed",
3270
  "shape": [
3271
+ 768
 
3272
  ],
3273
  "dtype": "F32",
3274
  "chunks": [
3275
  {
3276
  "offsets": [
 
3277
  0
3278
  ],
3279
  "shape": [
3280
+ 384
 
3281
  ],
3282
  "filename_index": 0
3283
  },
3284
  {
3285
  "offsets": [
3286
+ 384
 
3287
  ],
3288
  "shape": [
3289
+ 384
 
3290
  ],
3291
  "filename_index": 1
3292
  }
3293
  ]
3294
  },
3295
+ "h.5.ln_1.weight": {
3296
  "type": "Distributed",
3297
  "shape": [
3298
  768
 
3319
  }
3320
  ]
3321
  },
3322
+ "wpe.weight": {
3323
  "type": "Distributed",
3324
  "shape": [
3325
+ 1024,
3326
  768
3327
  ],
3328
  "dtype": "F32",
3329
  "chunks": [
3330
  {
3331
  "offsets": [
3332
+ 0,
3333
  0
3334
  ],
3335
  "shape": [
3336
+ 1024,
3337
  384
3338
  ],
3339
  "filename_index": 0
3340
  },
3341
  {
3342
  "offsets": [
3343
+ 0,
3344
  384
3345
  ],
3346
  "shape": [
3347
+ 1024,
3348
  384
3349
  ],
3350
  "filename_index": 1
3351
  }
3352
  ]
3353
  },
3354
+ "h.5.mlp.c_fc.weight": {
3355
  "type": "Distributed",
3356
  "shape": [
3357
+ 768,
3358
+ 3072
3359
  ],
3360
  "dtype": "F32",
3361
  "chunks": [
3362
  {
3363
  "offsets": [
3364
+ 0,
3365
  0
3366
  ],
3367
  "shape": [
3368
+ 768,
3369
+ 1536
3370
  ],
3371
  "filename_index": 0
3372
  },
3373
  {
3374
  "offsets": [
3375
+ 0,
3376
+ 1536
3377
  ],
3378
  "shape": [
3379
+ 768,
3380
+ 1536
3381
  ],
3382
  "filename_index": 1
3383
  }
3384
  ]
3385
  },
3386
+ "h.2.attn.bias": {
3387
  "type": "Distributed",
3388
  "shape": [
3389
+ 1,
3390
+ 1,
3391
+ 1024,
3392
+ 1024
3393
  ],
3394
  "dtype": "F32",
3395
  "chunks": [
3396
  {
3397
  "offsets": [
3398
+ 0,
3399
+ 0,
3400
+ 0,
3401
  0
3402
  ],
3403
  "shape": [
3404
+ 1,
3405
+ 1,
3406
+ 1024,
3407
+ 512
3408
  ],
3409
  "filename_index": 0
3410
  },
3411
  {
3412
  "offsets": [
3413
+ 0,
3414
+ 0,
3415
+ 0,
3416
+ 512
3417
  ],
3418
  "shape": [
3419
+ 1,
3420
+ 1,
3421
+ 1024,
3422
+ 512
3423
  ],
3424
  "filename_index": 1
3425
  }
3426
  ]
3427
  },
3428
+ "h.9.ln_2.bias": {
3429
  "type": "Distributed",
3430
  "shape": [
3431
  768
 
3452
  }
3453
  ]
3454
  },
3455
+ "h.1.attn.c_attn.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3456
  "type": "Distributed",
3457
  "shape": [
3458
+ 2304
3459
  ],
3460
  "dtype": "F32",
3461
  "chunks": [
 
3464
  0
3465
  ],
3466
  "shape": [
3467
+ 1152
3468
  ],
3469
  "filename_index": 0
3470
  },
3471
  {
3472
  "offsets": [
3473
+ 1152
3474
  ],
3475
  "shape": [
3476
+ 1152
3477
  ],
3478
  "filename_index": 1
3479
  }
3480
  ]
3481
  },
3482
+ "h.9.attn.bias": {
3483
  "type": "Distributed",
3484
  "shape": [
3485
  1,
 
3521
  }
3522
  ]
3523
  },
3524
+ "h.0.attn.c_attn.weight": {
3525
  "type": "Distributed",
3526
  "shape": [
3527
+ 768,
3528
+ 2304
 
 
3529
  ],
3530
  "dtype": "F32",
3531
  "chunks": [
3532
  {
3533
  "offsets": [
 
 
3534
  0,
3535
  0
3536
  ],
3537
  "shape": [
3538
+ 768,
3539
+ 1152
 
 
3540
  ],
3541
  "filename_index": 0
3542
  },
3543
  {
3544
  "offsets": [
3545
  0,
3546
+ 1152
 
 
3547
  ],
3548
  "shape": [
3549
+ 768,
3550
+ 1152
 
 
3551
  ],
3552
  "filename_index": 1
3553
  }
3554
  ]
3555
  },
3556
+ "h.0.ln_2.weight": {
3557
  "type": "Distributed",
3558
  "shape": [
3559
+ 768
3560
  ],
3561
  "dtype": "F32",
3562
  "chunks": [
 
3565
  0
3566
  ],
3567
  "shape": [
3568
+ 384
3569
  ],
3570
  "filename_index": 0
3571
  },
3572
  {
3573
  "offsets": [
3574
+ 384
3575
  ],
3576
  "shape": [
3577
+ 384
3578
  ],
3579
  "filename_index": 1
3580
  }
3581
  ]
3582
  },
3583
+ "h.5.ln_2.weight": {
3584
  "type": "Distributed",
3585
  "shape": [
3586
  768
 
3607
  }
3608
  ]
3609
  },
3610
+ "h.6.mlp.c_fc.weight": {
3611
  "type": "Distributed",
3612
  "shape": [
3613
+ 768,
3614
+ 3072
3615
  ],
3616
  "dtype": "F32",
3617
  "chunks": [
3618
  {
3619
  "offsets": [
3620
+ 0,
3621
  0
3622
  ],
3623
  "shape": [
3624
+ 768,
3625
+ 1536
3626
  ],
3627
  "filename_index": 0
3628
  },
3629
  {
3630
  "offsets": [
3631
+ 0,
3632
+ 1536
3633
  ],
3634
  "shape": [
3635
+ 768,
3636
+ 1536
3637
  ],
3638
  "filename_index": 1
3639
  }
3640
  ]
3641
  },
3642
+ "h.3.mlp.c_proj.bias": {
3643
  "type": "Distributed",
3644
  "shape": [
3645
+ 768
 
3646
  ],
3647
  "dtype": "F32",
3648
  "chunks": [
3649
  {
3650
  "offsets": [
 
3651
  0
3652
  ],
3653
  "shape": [
3654
+ 384
 
3655
  ],
3656
  "filename_index": 0
3657
  },
3658
  {
3659
  "offsets": [
3660
+ 384
 
3661
  ],
3662
  "shape": [
3663
+ 384
 
3664
  ],
3665
  "filename_index": 1
3666
  }
3667
  ]
3668
  },
3669
+ "h.10.ln_2.weight": {
3670
  "type": "Distributed",
3671
  "shape": [
3672
+ 768
 
3673
  ],
3674
  "dtype": "F32",
3675
  "chunks": [
3676
  {
3677
  "offsets": [
 
3678
  0
3679
  ],
3680
  "shape": [
3681
+ 384
 
3682
  ],
3683
  "filename_index": 0
3684
  },
3685
  {
3686
  "offsets": [
3687
+ 384
 
3688
  ],
3689
  "shape": [
3690
+ 384
 
3691
  ],
3692
  "filename_index": 1
3693
  }
3694
  ]
3695
  },
3696
+ "h.1.attn.bias": {
3697
  "type": "Distributed",
3698
  "shape": [
3699
+ 1,
3700
+ 1,
3701
+ 1024,
3702
+ 1024
3703
  ],
3704
  "dtype": "F32",
3705
  "chunks": [
3706
  {
3707
  "offsets": [
3708
+ 0,
3709
+ 0,
3710
  0,
3711
  0
3712
  ],
3713
  "shape": [
3714
+ 1,
3715
+ 1,
3716
+ 1024,
3717
+ 512
3718
  ],
3719
  "filename_index": 0
3720
  },
3721
  {
3722
  "offsets": [
3723
  0,
3724
+ 0,
3725
+ 0,
3726
+ 512
3727
  ],
3728
  "shape": [
3729
+ 1,
3730
+ 1,
3731
+ 1024,
3732
+ 512
3733
  ],
3734
  "filename_index": 1
3735
  }
3736
  ]
3737
  },
3738
+ "h.1.ln_1.bias": {
3739
  "type": "Distributed",
3740
  "shape": [
3741
  768
 
3762
  }
3763
  ]
3764
  },
3765
+ "h.4.attn.c_proj.bias": {
3766
  "type": "Distributed",
3767
  "shape": [
3768
  768
 
3789
  }
3790
  ]
3791
  },
3792
+ "h.2.attn.c_attn.weight": {
3793
  "type": "Distributed",
3794
  "shape": [
3795
+ 768,
3796
+ 2304
3797
  ],
3798
  "dtype": "F32",
3799
  "chunks": [
3800
  {
3801
  "offsets": [
3802
+ 0,
3803
  0
3804
  ],
3805
  "shape": [
3806
+ 768,
3807
+ 1152
3808
  ],
3809
  "filename_index": 0
3810
  },
3811
  {
3812
  "offsets": [
3813
+ 0,
3814
+ 1152
3815
  ],
3816
  "shape": [
3817
+ 768,
3818
+ 1152
3819
  ],
3820
  "filename_index": 1
3821
  }
3822
  ]
3823
  },
3824
+ "h.0.ln_2.bias": {
3825
  "type": "Distributed",
3826
  "shape": [
3827
  768
 
3848
  }
3849
  ]
3850
  },
3851
+ "h.9.attn.c_proj.bias": {
3852
  "type": "Distributed",
3853
  "shape": [
 
3854
  768
3855
  ],
3856
  "dtype": "F32",
3857
  "chunks": [
3858
  {
3859
  "offsets": [
 
3860
  0
3861
  ],
3862
  "shape": [
3863
+ 384
 
3864
  ],
3865
  "filename_index": 0
3866
  },
3867
  {
3868
  "offsets": [
3869
+ 384
 
3870
  ],
3871
  "shape": [
3872
+ 384
 
3873
  ],
3874
  "filename_index": 1
3875
  }
3876
  ]
3877
  },
3878
+ "h.5.attn.c_proj.bias": {
3879
  "type": "Distributed",
3880
  "shape": [
 
3881
  768
3882
  ],
3883
  "dtype": "F32",
3884
  "chunks": [
3885
  {
3886
  "offsets": [
 
3887
  0
3888
  ],
3889
  "shape": [
3890
+ 384
 
3891
  ],
3892
  "filename_index": 0
3893
  },
3894
  {
3895
  "offsets": [
3896
+ 384
 
3897
  ],
3898
  "shape": [
3899
+ 384
 
3900
  ],
3901
  "filename_index": 1
3902
  }
3903
  ]
3904
  },
3905
+ "h.9.ln_1.weight": {
3906
  "type": "Distributed",
3907
  "shape": [
3908
  768
 
3929
  }
3930
  ]
3931
  },
3932
+ "h.4.mlp.c_proj.bias": {
3933
  "type": "Distributed",
3934
  "shape": [
3935
  768
 
3956
  }
3957
  ]
3958
  },
3959
+ "h.2.attn.c_proj.weight": {
3960
  "type": "Distributed",
3961
  "shape": [
3962
+ 768,
3963
  768
3964
  ],
3965
  "dtype": "F32",
3966
  "chunks": [
3967
  {
3968
  "offsets": [
3969
+ 0,
3970
  0
3971
  ],
3972
  "shape": [
3973
+ 384,
3974
+ 768
3975
  ],
3976
  "filename_index": 0
3977
  },
3978
  {
3979
  "offsets": [
3980
+ 384,
3981
+ 0
3982
  ],
3983
  "shape": [
3984
+ 384,
3985
+ 768
3986
  ],
3987
  "filename_index": 1
3988
  }
3989
  ]
3990
  },
3991
+ "h.7.attn.c_proj.weight": {
3992
  "type": "Distributed",
3993
  "shape": [
3994
+ 768,
3995
  768
3996
  ],
3997
  "dtype": "F32",
3998
  "chunks": [
3999
  {
4000
  "offsets": [
4001
+ 0,
4002
  0
4003
  ],
4004
  "shape": [
4005
+ 384,
4006
+ 768
4007
  ],
4008
  "filename_index": 0
4009
  },
4010
  {
4011
  "offsets": [
4012
+ 384,
4013
+ 0
4014
  ],
4015
  "shape": [
4016
+ 384,
4017
+ 768
4018
  ],
4019
  "filename_index": 1
4020
  }
4021
  ]
4022
  },
4023
+ "h.2.mlp.c_fc.bias": {
4024
  "type": "Distributed",
4025
  "shape": [
4026
+ 3072
4027
  ],
4028
  "dtype": "F32",
4029
  "chunks": [
 
4032
  0
4033
  ],
4034
  "shape": [
4035
+ 1536
4036
  ],
4037
  "filename_index": 0
4038
  },
4039
  {
4040
  "offsets": [
4041
+ 1536
4042
  ],
4043
  "shape": [
4044
+ 1536
4045
  ],
4046
  "filename_index": 1
4047
  }
4048
  ]
4049
  },
4050
+ "h.7.mlp.c_fc.bias": {
4051
  "type": "Distributed",
4052
  "shape": [
4053
+ 3072
 
 
 
4054
  ],
4055
  "dtype": "F32",
4056
  "chunks": [
4057
  {
4058
  "offsets": [
 
 
 
4059
  0
4060
  ],
4061
  "shape": [
4062
+ 1536
 
 
 
4063
  ],
4064
  "filename_index": 0
4065
  },
4066
  {
4067
  "offsets": [
4068
+ 1536
 
 
 
4069
  ],
4070
  "shape": [
4071
+ 1536
 
 
 
4072
  ],
4073
  "filename_index": 1
4074
  }
4075
  ]
4076
  },
4077
+ "h.2.ln_2.bias": {
4078
  "type": "Distributed",
4079
  "shape": [
4080
  768
 
4101
  }
4102
  ]
4103
  },
4104
+ "h.6.attn.c_attn.bias": {
4105
  "type": "Distributed",
4106
  "shape": [
 
4107
  2304
4108
  ],
4109
  "dtype": "F32",
4110
  "chunks": [
4111
  {
4112
  "offsets": [
 
4113
  0
4114
  ],
4115
  "shape": [
 
4116
  1152
4117
  ],
4118
  "filename_index": 0
4119
  },
4120
  {
4121
  "offsets": [
 
4122
  1152
4123
  ],
4124
  "shape": [
 
4125
  1152
4126
  ],
4127
  "filename_index": 1
4128
  }
4129
  ]
4130
  },
4131
+ "h.6.mlp.c_fc.bias": {
4132
  "type": "Distributed",
4133
  "shape": [
4134
+ 3072
 
4135
  ],
4136
  "dtype": "F32",
4137
  "chunks": [
4138
  {
4139
  "offsets": [
 
4140
  0
4141
  ],
4142
  "shape": [
4143
+ 1536
 
4144
  ],
4145
  "filename_index": 0
4146
  },
4147
  {
4148
  "offsets": [
4149
+ 1536
 
4150
  ],
4151
  "shape": [
4152
+ 1536
 
4153
  ],
4154
  "filename_index": 1
4155
  }
4156
  ]
4157
  },
4158
+ "h.0.mlp.c_fc.bias": {
4159
  "type": "Distributed",
4160
  "shape": [
4161
+ 3072
 
4162
  ],
4163
  "dtype": "F32",
4164
  "chunks": [
4165
  {
4166
  "offsets": [
 
4167
  0
4168
  ],
4169
  "shape": [
4170
+ 1536
 
4171
  ],
4172
  "filename_index": 0
4173
  },
4174
  {
4175
  "offsets": [
4176
+ 1536
 
4177
  ],
4178
  "shape": [
4179
+ 1536
 
4180
  ],
4181
  "filename_index": 1
4182
  }
4183
  ]
4184
  },
4185
+ "h.4.mlp.c_fc.weight": {
4186
  "type": "Distributed",
4187
  "shape": [
4188
+ 768,
4189
+ 3072
 
 
4190
  ],
4191
  "dtype": "F32",
4192
  "chunks": [
4193
  {
4194
  "offsets": [
 
 
4195
  0,
4196
  0
4197
  ],
4198
  "shape": [
4199
+ 768,
4200
+ 1536
 
 
4201
  ],
4202
  "filename_index": 0
4203
  },
4204
  {
4205
  "offsets": [
4206
  0,
4207
+ 1536
 
 
4208
  ],
4209
  "shape": [
4210
+ 768,
4211
+ 1536
 
 
4212
  ],
4213
  "filename_index": 1
4214
  }
4215
  ]
4216
  },
4217
+ "h.11.attn.c_proj.weight": {
4218
  "type": "Distributed",
4219
  "shape": [
4220
+ 768,
4221
+ 768
4222
  ],
4223
  "dtype": "F32",
4224
  "chunks": [
4225
  {
4226
  "offsets": [
4227
+ 0,
4228
  0
4229
  ],
4230
  "shape": [
4231
+ 384,
4232
+ 768
4233
  ],
4234
  "filename_index": 0
4235
  },
4236
  {
4237
  "offsets": [
4238
+ 384,
4239
+ 0
4240
  ],
4241
  "shape": [
4242
+ 384,
4243
+ 768
4244
  ],
4245
  "filename_index": 1
4246
  }
4247
  ]
4248
  },
4249
+ "h.6.ln_1.bias": {
4250
  "type": "Distributed",
4251
  "shape": [
4252
+ 768
4253
  ],
4254
  "dtype": "F32",
4255
  "chunks": [
 
4258
  0
4259
  ],
4260
  "shape": [
4261
+ 384
4262
  ],
4263
  "filename_index": 0
4264
  },
4265
  {
4266
  "offsets": [
4267
+ 384
4268
  ],
4269
  "shape": [
4270
+ 384
4271
  ],
4272
  "filename_index": 1
4273
  }
4274
  ]
4275
  },
4276
+ "h.0.mlp.c_proj.weight": {
4277
  "type": "Distributed",
4278
  "shape": [
4279
  3072,
 
4305
  }
4306
  ]
4307
  },
4308
+ "h.8.attn.c_attn.bias": {
4309
  "type": "Distributed",
4310
  "shape": [
4311
+ 2304
 
4312
  ],
4313
  "dtype": "F32",
4314
  "chunks": [
4315
  {
4316
  "offsets": [
 
4317
  0
4318
  ],
4319
  "shape": [
4320
+ 1152
 
4321
  ],
4322
  "filename_index": 0
4323
  },
4324
  {
4325
  "offsets": [
4326
+ 1152
 
4327
  ],
4328
  "shape": [
4329
+ 1152
 
4330
  ],
4331
  "filename_index": 1
4332
  }
4333
  ]
4334
  },
4335
+ "h.7.ln_2.bias": {
4336
  "type": "Distributed",
4337
  "shape": [
4338
  768
 
4359
  }
4360
  ]
4361
  },
4362
+ "h.0.attn.c_attn.bias": {
4363
  "type": "Distributed",
4364
  "shape": [
4365
+ 2304
4366
  ],
4367
  "dtype": "F32",
4368
  "chunks": [
 
4371
  0
4372
  ],
4373
  "shape": [
4374
+ 1152
4375
  ],
4376
  "filename_index": 0
4377
  },
4378
  {
4379
  "offsets": [
4380
+ 1152
4381
  ],
4382
  "shape": [
4383
+ 1152
4384
  ],
4385
  "filename_index": 1
4386
  }
4387
  ]
4388
  },
4389
+ "h.9.mlp.c_proj.bias": {
4390
  "type": "Distributed",
4391
  "shape": [
4392
  768
 
4413
  }
4414
  ]
4415
  },
4416
+ "h.11.ln_1.bias": {
4417
  "type": "Distributed",
4418
  "shape": [
4419
  768
 
4440
  }
4441
  ]
4442
  },
4443
+ "h.4.attn.bias": {
4444
+ "type": "Distributed",
4445
+ "shape": [
4446
+ 1,
4447
+ 1,
4448
+ 1024,
4449
+ 1024
4450
+ ],
4451
+ "dtype": "F32",
4452
+ "chunks": [
4453
+ {
4454
+ "offsets": [
4455
+ 0,
4456
+ 0,
4457
+ 0,
4458
+ 0
4459
+ ],
4460
+ "shape": [
4461
+ 1,
4462
+ 1,
4463
+ 1024,
4464
+ 512
4465
+ ],
4466
+ "filename_index": 0
4467
+ },
4468
+ {
4469
+ "offsets": [
4470
+ 0,
4471
+ 0,
4472
+ 0,
4473
+ 512
4474
+ ],
4475
+ "shape": [
4476
+ 1,
4477
+ 1,
4478
+ 1024,
4479
+ 512
4480
+ ],
4481
+ "filename_index": 1
4482
+ }
4483
+ ]
4484
+ },
4485
+ "h.1.ln_1.weight": {
4486
  "type": "Distributed",
4487
  "shape": [
4488
  768
 
4509
  }
4510
  ]
4511
  },
4512
+ "h.6.attn.c_proj.bias": {
4513
  "type": "Distributed",
4514
  "shape": [
4515
+ 768
 
4516
  ],
4517
  "dtype": "F32",
4518
  "chunks": [
4519
  {
4520
  "offsets": [
 
4521
  0
4522
  ],
4523
  "shape": [
4524
+ 384
 
4525
  ],
4526
  "filename_index": 0
4527
  },
4528
  {
4529
  "offsets": [
4530
+ 384
 
4531
  ],
4532
  "shape": [
4533
+ 384
 
4534
  ],
4535
  "filename_index": 1
4536
  }
4537
  ]
4538
  },
4539
+ "h.1.attn.c_attn.weight": {
4540
  "type": "Distributed",
4541
  "shape": [
4542
  768,
4543
+ 2304
4544
  ],
4545
  "dtype": "F32",
4546
  "chunks": [
 
4550
  0
4551
  ],
4552
  "shape": [
4553
+ 768,
4554
+ 1152
4555
  ],
4556
  "filename_index": 0
4557
  },
4558
  {
4559
  "offsets": [
4560
+ 0,
4561
+ 1152
4562
  ],
4563
  "shape": [
4564
+ 768,
4565
+ 1152
4566
  ],
4567
  "filename_index": 1
4568
  }
4569
  ]
4570
  },
4571
+ "h.6.attn.bias": {
4572
  "type": "Distributed",
4573
  "shape": [
4574
+ 1,
4575
+ 1,
4576
+ 1024,
4577
+ 1024
4578
  ],
4579
  "dtype": "F32",
4580
  "chunks": [
4581
  {
4582
  "offsets": [
4583
+ 0,
4584
+ 0,
4585
+ 0,
4586
  0
4587
  ],
4588
  "shape": [
4589
+ 1,
4590
+ 1,
4591
+ 1024,
4592
+ 512
4593
  ],
4594
  "filename_index": 0
4595
  },
4596
  {
4597
  "offsets": [
4598
+ 0,
4599
+ 0,
4600
+ 0,
4601
+ 512
4602
  ],
4603
  "shape": [
4604
+ 1,
4605
+ 1,
4606
+ 1024,
4607
+ 512
4608
  ],
4609
  "filename_index": 1
4610
  }
4611
  ]
4612
  },
4613
+ "h.6.mlp.c_proj.bias": {
4614
  "type": "Distributed",
4615
  "shape": [
 
4616
  768
4617
  ],
4618
  "dtype": "F32",
4619
  "chunks": [
4620
  {
4621
  "offsets": [
 
4622
  0
4623
  ],
4624
  "shape": [
4625
+ 384
 
4626
  ],
4627
  "filename_index": 0
4628
  },
4629
  {
4630
  "offsets": [
4631
+ 384
 
4632
  ],
4633
  "shape": [
4634
+ 384
 
4635
  ],
4636
  "filename_index": 1
4637
  }
4638
  ]
4639
  },
4640
+ "h.4.attn.c_attn.weight": {
4641
  "type": "Distributed",
4642
  "shape": [
4643
  768,
 
4669
  }
4670
  ]
4671
  },
4672
+ "h.11.attn.c_proj.bias": {
4673
  "type": "Distributed",
4674
  "shape": [
4675
+ 768
 
4676
  ],
4677
  "dtype": "F32",
4678
  "chunks": [
4679
  {
4680
  "offsets": [
 
4681
  0
4682
  ],
4683
  "shape": [
4684
+ 384
 
4685
  ],
4686
  "filename_index": 0
4687
  },
4688
  {
4689
  "offsets": [
4690
+ 384
 
4691
  ],
4692
  "shape": [
4693
+ 384
 
4694
  ],
4695
  "filename_index": 1
4696
  }
4697
  ]
4698
  },
4699
+ "h.2.ln_1.bias": {
4700
  "type": "Distributed",
4701
  "shape": [
4702
+ 768
 
 
 
4703
  ],
4704
  "dtype": "F32",
4705
  "chunks": [
4706
  {
4707
  "offsets": [
 
 
 
4708
  0
4709
  ],
4710
  "shape": [
4711
+ 384
 
 
 
4712
  ],
4713
  "filename_index": 0
4714
  },
4715
  {
4716
  "offsets": [
4717
+ 384
 
 
 
4718
  ],
4719
  "shape": [
4720
+ 384
 
 
 
4721
  ],
4722
  "filename_index": 1
4723
  }
4724
  ]
4725
  },
4726
+ "h.5.mlp.c_fc.bias": {
4727
  "type": "Distributed",
4728
  "shape": [
4729
+ 3072
4730
  ],
4731
  "dtype": "F32",
4732
  "chunks": [
 
4735
  0
4736
  ],
4737
  "shape": [
4738
+ 1536
4739
  ],
4740
  "filename_index": 0
4741
  },
4742
  {
4743
  "offsets": [
4744
+ 1536
4745
  ],
4746
  "shape": [
4747
+ 1536
4748
  ],
4749
  "filename_index": 1
4750
  }