| { | |
| "metadata": { | |
| "ParamSize": 149, | |
| "ParamBytes": 326074368.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 77194752, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 50257, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 77194752, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8aa629e8739ff337f4983e485d0a145e" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 77194752, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.weight", | |
| "shape": [ | |
| 50257, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 77194752, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8aa629e8739ff337f4983e485d0a145e" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33470976, | |
| "records": [ | |
| { | |
| "name": "transformer.wpe.weight", | |
| "shape": [ | |
| 1024, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1574400 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 1575936 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 5114880 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 5119488 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 6299136 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 6300672 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 6302208 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6303744 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 11022336 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 11028480 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15747072 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15748608 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15750144 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 15751680 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 19290624 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 19295232 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 20474880 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 20476416 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 20477952 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 20479488 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 25198080 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 25204224 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29922816 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29924352 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29925888 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 29927424 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 33466368 | |
| } | |
| ], | |
| "md5sum": "748d75aca928d18aac58d8f9301c3bcb" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29535744, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1179648 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1181184 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1182720 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 1184256 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 5902848 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 5908992 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 10627584 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 10629120 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 10630656 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 10632192 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 14171136 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15355392 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15356928 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15358464 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 15360000 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 20078592 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 20084736 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24803328 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24804864 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24806400 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 24807936 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 28346880 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29531136 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29532672 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29534208 | |
| } | |
| ], | |
| "md5sum": "94d1cea00ea6a1492cb7bb23eb5d64b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33076224, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4724736 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9443328 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9444864 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9446400 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 9447936 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 12986880 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 12991488 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14171136 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14172672 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14174208 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18894336 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18900480 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23619072 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23620608 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23622144 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 23623680 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 27162624 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 27167232 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28346880 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28348416 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28349952 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33070080 | |
| } | |
| ], | |
| "md5sum": "43957cf0e0da3bc016aa35099e8a4e53" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33074688, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4720128 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4721664 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 4723200 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 8262144 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 8266752 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9446400 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9447936 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9451008 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 14169600 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18894336 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18895872 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18897408 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 22437888 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 22442496 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23622144 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23623680 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23625216 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23626752 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28345344 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33070080 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33071616 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33073152 | |
| } | |
| ], | |
| "md5sum": "4cbbe2cce00b37c72cd95d22b3aa03d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33079296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 3538944 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 3543552 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4723200 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4724736 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4726272 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4727808 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 9446400 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9452544 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14171136 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14172672 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14174208 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17714688 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 17719296 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18900480 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18902016 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18903552 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23622144 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23628288 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28346880 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28348416 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28349952 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 31890432 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 31895040 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33074688 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33076224 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33077760 | |
| } | |
| ], | |
| "md5sum": "4f1aa298f8b0af477effc227d22f5ef1" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 9447936, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4724736 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9443328 | |
| }, | |
| { | |
| "name": "transformer.ln_f.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9444864 | |
| }, | |
| { | |
| "name": "transformer.ln_f.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9446400 | |
| } | |
| ], | |
| "md5sum": "1532478e2b064b31091ff718a8c67188" | |
| } | |
| ] | |
| } |