Qwen2.5-Coder-14B-Instruct-q4f32_1-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Upload folder using huggingface_hub
a5a2d34 verified
{
"metadata": {
"ParamSize": 533,
"ParamBytes": 9234108416.0,
"BitsPerParam": 5.001536828453907
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "21fec9dbca9130f963de9fb5a2600f94"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "f6e1145a931adb39368d3482afc28060"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d6b0b9b3c85716761b3c968eca6a51c0"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5ac0e57616cce117dd3f0d4c14225f12"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8dce9eef02ece331f97cb8b9cdcd356d"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4fb99aa9c47d77fc4a7e03c9b8477ae0"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c4cdc1471a8d862aa36fdc0a7ebd16e3"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 28891136,
"records": [
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 10240
},
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4433920
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13281280
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13291520
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 13301760
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 17725440
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 26572800
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 26583040
},
{
"name": "model.layers.43.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 26597376
}
],
"md5sum": "d60783305f83f83941448e82706ceac1"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "86e77edd47eafefb8a8456965e6bee29"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6c53ddb70cfd1aa9e172e48f0c725108"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "46677454d131ca67618e93b458f91ffa"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.44.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "048a1298997f118cf64e7c9c6d586b17"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "18dddd0f1919337ebf3afc3003843a4a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "29424eba7cc326df5fc52005ef6cda0c"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "173754727c1255a6dffe87d5e0febf1b"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.45.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "448390dd622190054cee2126d71d2369"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a68831bf80170147895585cb628ba20e"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e94e57ffa1c7b89d7abcdccb93c4d33d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "91db90df869f4c39abf891d72a3a0ea2"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.46.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "264bd26b4f77b8915649aaf134d5b294"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "736275bf52f8223b152e48f7d5c03d3b"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "534cefca621192268d11f4ae4bd9a5e1"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "9382ba8e3686f0c608027aef4660caa7"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.47.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "062ef2d15bc7c7476b7281e6452e0ded"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "d1847bf1f9f63d5248b392d08c695f9e"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "4c8080b110e35084cbc54f94efc7c927"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0985eee5735e2e7e2af87a3f85cd5381"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0e38df8f97bb5e475b27faf0e6e6aedc"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "84e0ea205875f3db1fa16b182f9750ec"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 30355456,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14755840
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14766080
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19189760
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28037120
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28047360
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28061696
}
],
"md5sum": "61924c15b911411baeb4328117582155"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6d73613d44bb8b08b376db3ddd928954"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "edaceaa2b19b1de0959babf498c52446"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4d7cad6104eedbe3f0b7046cb19224db"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "44f39fe8f500f4574a68d5ee9709e8ab"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "22e24085402165170d3c18413acfcfdb"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "22108c37a8562d73d8d1d47ed33847a2"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "575e8c85074d36fcb384c2f5e77c5fad"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "e29a1387d32d06633c9d1f779cda7dfc"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "55d3bf7db621b6bdfb73a6553c391829"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3290e43b8919d28cbc676ac1f53f426c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "693aee923db022ce559083bf2bfef7e6"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "7022cc94afe6e3496fe6aeada766646c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7a9535c42a9b461ba54e8aaa72e8f14b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0828ec5a716ad1b3d81c4ad44ac12d33"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "da355eaaa48f79c4026f12db81786d60"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1a1566e402f13f50f94cea7b25cf8098"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "077896afc42eca087eaa53daeffd992b"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c44168fea3948c10a0c296ebe86a6f96"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "08a6c6e6b0497e99eb1961681e4a3bff"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "c03fb366d36f772f6a3db6677c791a68"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33110016,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 14745600
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14759936
}
],
"md5sum": "15562e954e417542b466c7ebba736fee"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "eced1495325f8186d908b3e7529e94d0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b57f35e07b2150044beeaf3e93f90fa7"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c0a5aa5f3315c7d21675846de81d407e"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "9cb91dd8ac24e1c5eb7181864b70c6b0"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "83d70661cecef1a9b1a3152d01626cb3"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7d421dfd64ad40d41194dd5c8986598e"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "71baccb207d951f2d0e9748c21abdce0"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "2225816834ab1d67bcd520f0de8f5376"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a401d69c388a3dfbcefc0ae41144abcc"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6259aeacdc878dc30476020da693ba81"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "55375cd6c8f070e84b7fcf3524e6e1a0"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "f720babd7ddd6550db15278c589d5119"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "dd4dd53f144a550bea42d65a0dd5710c"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3cc2ca2d234d5f5429002466cc547f4c"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "1f9b31116f28212530d75e22d91c57e3"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "164a1ad0cd706cea434c0010c0ab8c7d"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b9fc6d022a499b65b46cd06071b28b38"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2a26e6c3d7a9fb4d6f3aa86404835efa"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "942c9ca736c7022c71628a848827e5be"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "f877d80f85829cfa2ce6f2b4fd745418"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33110016,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 14745600
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14759936
}
],
"md5sum": "feb4f9de281cc8aa299859bcd88ae195"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6d138f8845da9b288721ff66a1c2e2fe"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c1b4d0d58fb1972c6a1d69b7677b7740"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "af8cd5d34e5b7c94fc55a6d03540f320"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 30341120,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30330880
}
],
"md5sum": "2651e2555eae3a74f9ca314cd25f27f3"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9cf3711f1f0b9fca49b3a663ec7441aa"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 31645696,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 13281280
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 13295616
}
],
"md5sum": "775440dd1fe7a70913a6886acc6b1cc9"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "74cb82f04a69562b5e4579bc75b9c097"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "46fd38ce6ad1a126fcb4bc4bdd5774dd"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "eaaffa61e9d6b02570fbf9727f8d1943"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "5dc744b7aa713e0519026f2eba687045"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e0cae0641ba5830b9b815be1602fc9da"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "78c475a964cb63753012b4b761d474b9"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "99f045bb7023be9a884eeead83a7e974"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "eebc4b15673e3f0bf9bdca7e4acf6c45"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "db46cbfe53b384de5892e4dae0d2a528"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "93ef9d2939ce610a0f7da0d42db0c642"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d000b69430b913806b0c585d0fa72af7"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b0a41523c558f542560ce0e8ea41a716"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 32471040,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28037120
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28047360
}
],
"md5sum": "2a46083f0e1c9f107e7342499bd9abee"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 29515776,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 8857600
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 8871936
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 27222016
}
],
"md5sum": "016b09807bffd68a92976930b9d8d298"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e3d9dda56a5c8594e7ceedf7d4d24e14"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "cdee9aadd972b9e06987610dc6bbc2d5"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4edba327093368bbb77240e95a2174f5"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "6e960a3ec8d1596749ba3eb9b239f5f6"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "12ffcb40bf990074dcaff3773e216ed6"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e8f79f787548c0328645ffc8e3a66e99"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "417dec0903bd372d72defed66cada8cb"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "69424ca34f7840db36bf4f73e1a4c012"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f3445da8fa9a2426837661b6341d166c"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2512566ed0ebf67ff6be2400db9d33fd"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "fa76ac245b52c99013dd3aa8b1dcf997"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "3ba5a7c4cbeefd5519380128dd1f6339"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "206ac6edfa388b698a30e009f42e6fda"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bb00fc2ee7062bb5b0bdd988ac22211d"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d3d9c4d81faf86c17eadc1a2d8c8becc"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "736d04a8f5271e5b07ffa8c6f66fc500"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e4b0e65ac23684136fc971f416c04518"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6571a990cfa409a67f2e6015ca3a04dc"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b82dba03c4faa3d5f18091157d8092dd"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "78549688f64c6712e085b98aa53326ad"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f779451c3cfa33002f44d3cdf004169f"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a1c98b6328371bd075cd8ae24862c86d"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "eb3866802671e07fb64ef41eb15afb76"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "b2832263b6e6e762581125cabc26ca79"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "10719bd033b7e6ff40633180c8e3206d"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0d471400023ba1d6e0e7e338908c5c74"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "fb0e4c5d64943a7147cdfcb9fe3d8871"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "653e3c73140f57ebfc57a87b7f5a1509"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 33110016,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 14745600
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14759936
}
],
"md5sum": "ff81852804d1ddd7cfa56941570c31ba"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6e0a1da951b825473de0f1fb9fbab79f"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5ff40f6459169f6f98d665b9d7561a24"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9bb7e6230a361aaeeff3c8667357af29"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 30341120,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30330880
}
],
"md5sum": "39edb461b2729f3c81df2c5c5cd9e504"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3ff952f957124d4cf75431b13c840017"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 31645696,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 13281280
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 13295616
}
],
"md5sum": "55ee2d855ecc1cae8f63b2f2438be13b"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2f8b29731ae04791bd7dc4f125164260"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ad16838457be241bc2bd6900b6e33662"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "afe3ef38c929ec72d0fc30bb699958b1"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "1828b96cb17aa0113a6407a26e933954"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7c2319ed6fb03cf0bd83debca2bbcca0"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ed7aef2fc1a130a6daa88cac52056d8d"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d831c3cc35860dea0d8b341ffc819e02"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "d6dc0ed8c1d55d5f5f864fb66339d525"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b5fa22a490c8953c37652d91a6ab116a"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4e10ee74d13936236e0f737275cae376"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4218104fe894cd1bc4e099ca95a91a54"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "b1a8ce8d0eebd35fb8ece18a921c3f62"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a5c3f87825ba98f6f2d397cfa3f5b0a4"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b4895fa7d8ba921a5e6d070d15f174f8"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f66d446d5c666f3bb48df96838ec3e77"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "7107c7ea442802df6d2d6e0f29704afd"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7cf953395ef60ce3b7880389dfe5cf10"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c5a45ef5168f4b7d6170ae0095cd5341"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5e3bf5350127a20ce047da9bc35bdac6"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "177549074b4ce442caa8aa3cf90ef925"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5034e22deb0c8c33f4deca4f45d3efa4"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d67ff8cfafe87ff98d227207a7c3ef25"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b676dbf85c641047d28baeb7add9f3f5"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "4c311a7910b830482205905cccc15385"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d00eb9730fead41279ebe70b314f0a10"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d1ac34f88796bb7b10ead9c55e9a84e6"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3974c2837f3dfbafb77d555e5a226501"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "add2b25e780dbdf32b1715aed72b9415"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33110016,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 14745600
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14759936
}
],
"md5sum": "af0f6d6b2976a6c5d1110364b6637f91"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a984fa1f9bafcf9bfa3c073f24edc4dc"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6b1ed33cee0d27d5b1937073213f740c"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0631c9875afdd146b983ff6613badf1e"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 30341120,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30330880
}
],
"md5sum": "3ea094a6f93ab198d30df31d25e412ee"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f3377a6840a4ba511024c2dca66868b8"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 31645696,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 13281280
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 13295616
}
],
"md5sum": "a1835a643b7a63e800386faf0a1b4fbc"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "453b76b7d6eb65187b67fdd68f97f18d"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f5758cd6aa53151387e796f863619cb4"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "a9827dedc6b6d29456b09ef39a5a89c4"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "da92ba883759c5185bb9d14c2c5401d1"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bc0bca40b15751989caaed235db6a13b"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "64c69e7a24ac133daaec8551abb6cbbd"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f3143e7c2a7b782ebbfb363db2bd9b3b"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "9d9dc8a501a51789f40741d385522bf0"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "85e65053a41abf976e4c32058d16cd84"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0bbe50dbfc8c7244c140d7038dfe5e91"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "a90cffaeaf152ee6c12378aec9868edd"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "bca6e012ce42eb5eac30ac2dff2f6d41"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b37ff0a7333b40e0c6ca6e92df357675"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "79676c3ed23f9c8cec0980b32bddb5ec"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "2b0c003178fa61575a31259af76c879f"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1437e0d6df954f2f861e416415fca90c"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "63f1d3c8a85d2686712ff2c6d0c4d040"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "308e8773520c90a88504fa187e96ef00"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e390ea8e46997b6775068215de25b200"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "df4ef75283bb5a517695e52a659b3a76"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0d0150ec8b2ce062cede337cecddc093"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "dda518c624c0372f7e0d57c4842a3429"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "335bb46bb55efad78bb946693fc4b847"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.40.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "35f2dc69e2d4521cf645a01d55ae0ab2"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0287f43ad7df8064c3db4b7fcbd608a2"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4c643266a33dbf0de87a295d76f26d04"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "6f94fc5b1539a0dff380d9a4f52196d6"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.41.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "44c5aa4aff5d116501e4b2fac2c11bd5"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 33110016,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14336,
"byteOffset": 14745600
},
{
"name": "model.layers.42.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14759936
}
],
"md5sum": "7b4d44d69c89e30b172130dd12c1a1a0"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 17039360,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 15400960
}
],
"md5sum": "0984f63d235a587b4b6cdc66c3568ed5"
}
]
}