| { | |
| "metadata": { | |
| "ParamSize": 194, | |
| "ParamBytes": 7672043520.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1229193216, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.weight", | |
| "shape": [ | |
| 200064, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1229193216, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a6a0f770aa96e933898a58fd50c5d637" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9cfd2557e123b19d11695e7260fb0915" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8b37bf7d7e8e9985737a2243b540d8cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4da16eaf72d38f0285168f45514e4f0d" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "40b99c62203019626cffab96a1388a15" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "21b86e0dc1a1a40ea415c53ab3290ff5" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d8493994eed748bf1c1966d832eff628" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ecbffb89703eb7aea3237393e6f23445" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2cfdcf061496c0c6d493be8fdc64f949" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ff2eb453de6102d8f88f4ab32af0de4" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "62d74c592707e8e6cd3d66aa5a9f189d" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7227e1c8f613c7fcfe0a90df60d14bfc" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "676ee98fdefdd19fccfce02bb326dfc9" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "49146429e6da2b6f7784b6dd698a91f5" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0640b76678c115ce10dec062ef2a6fa0" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "82f495796b8d9c03eec81e588024fc87" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9c38c70ffb42607f4ddb809c8077d015" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "02c65433dc13ad88ba4bb0c3a8e0178f" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0655f6c9097a3568709be390b456895c" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ca8395dd8f5ff545b0e4a94c75e135ec" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "31ec8619f3a4f5281f6d328313a0a818" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dc01abb1d0107a3173da98f0e0b7aea3" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "40f1dd341e9343663be3ad43a8195763" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6f22184e6b6ecce9b6e4834268d7cd0f" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cbeec6b41c35554989c63bdf63dd2c94" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e6b183ea0b0368013d748a223d3980e" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df7421b29b4374eaffd2639f0dc8cdec" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1102da53396ce122cfb087ff82fb69a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bd1c2123e46231b2672c38c3ff20b297" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "05315931ad28e2f2851dc46b7ba17762" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf4899d6f7aa15897fb5beafeeba7838" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e599d390bb239935097fe94b2a1fa44a" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "179a5b81024492859111af09335dcb21" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3d6e52af0fa62f76b8b9667eeb344b98" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "25fbddc2589ea773880ac503bfb397b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "19fe2c4b4dab419da56f4cde9bbab67f" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "94873b11c64f06eff2b4dfab0c9ee022" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2c0b73de143a7265160e9fc16e4a8e1f" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "39a159f13536661e8dfe2021d73b4f1c" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "848f18e2903dbcd69711a4ca98194853" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cdc490e1bd084fef7104da6c1c21e757" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f12654262ad667db747a6f21f3d495a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad867e74db5ee25a3d9ba6a1f321cf0a" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f590ad153aa6c00df91ad06af3ea45f6" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f90e064bfb24e006f7e93308dc41d9cc" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4b1a354b74d9d243d8ed1c993764dc9b" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe524c0729b79e1209e520c510c11f43" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bbbefd04eb98a1b5bfe08c11a0fb34e0" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3e1c53c20aa791d5eda1bfb66b30331" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "84438bb0693241db56d6c02b00f53ea7" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "634d230e3c7bb0a333170127a21b4718" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "08a1cd3e5d24f2de4100b631d387e79b" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cc5d9d626e372015b34cb0c3a33f81c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8545c3e679b26ded4f870d8bbc2e8386" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1152cacff42bf2654949226094ffadb4" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4da9077bed768f2b1ae7289e8c98ee64" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "50cc76b3779afd035ada784e954aa81b" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5ed6ab31ba79e6b77be4d6e2bd8cc253" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2bb6a3d1a87ffea8bd57a5e35b4fb49b" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f7f55fbd442a07212af2a6e80d30d70" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df472c28af999282faffd47e803f1c93" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "90bd3281eda0993105c396a45539272b" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e45c166f49e61ad68e40a504d367273c" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf7f944a46a468b916b51b76482773fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5bdfca44b47ddf56b6c20e9e1716dad2" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c43dd2fd57a54108dc00f67ec2e498ad" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f0941f1e090def1c01f0de784ac14670" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d8946fd8b369b06d3cf7ed67e49c9533" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "adbbe425f01d94e3900c00e1bd126810" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4dd0df11d831e7c5f8b30d1f60e0c4d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "16386495efbb5fd0b7044218a8b833a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f749c79ab9383e87b8a15f0def5ef531" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bb56691de944334b790a3774f5f59b48" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "44382bdf57aa0e04a3942b40d75704e8" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d6a0b7180eff364dbed0862580fbad90" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e77d472a18f7b1cb58708051b4f1c8fa" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ab6a6de50593885e77435f52afb23ff0" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e9253d8865e46f851f8bd5cdd9afee81" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ad5e96d06a179ba71d607bc720851de" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d7797cc47b94d5fa6c43244a8a6a585e" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2b72440cd6a812762a8e1de6e3447f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "43b019e7ff3bf1eb9bbe73362259631b" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9b5171df8a436f456a313c6fdf281a7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6f7c97f7c57bfac34779db2acef9b272" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "18c004b0674f29278099b44e0ae980d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ea1f24701d39dcafb5b712eabc133355" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7529605a38d493f4bbb28fcd451d1bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "797939fe8ce9b7376966abca454b4f65" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "be56543783f12eedb4f08c727912db7b" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "30a3efbcd8fb5a228a6c3c510dedd336" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "434776258ce7f5ffdbf01a4f79e5ed93" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a633c128186a435d90b97a61b414da19" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "55ae6d47eab9d634b49ef8777f2b0811" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4ced40290bc187a01a677ce6f1f32a05" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aebeabdc04c01612a6beddeea49f5bf0" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5a3c8b66da1ade2b75157f12258de8cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0aa19b7ae62b586d20d675771cb3159f" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "76a2405f29edc4d175cd0a70eecc7359" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c364110c7891ac5f1ad934b4a4dca5e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c4effe1f99c15bc3e558324a55864ac2" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0ec1df8f07030da33949cea6e71367e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c4e1acafbbc88ff8ef30704dd7d02913" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aa80aa0674ffba107caf35db890f23c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9622d57728546e39176a48c62259840b" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7301d46563bff2600452ab29d4d7c2d" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f32f04c8cd4e4e598e955f5a7141ef9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "733289838bec8f41d58a95d40ef1ae1f" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bdd31388297ac0de1ce923d2a2377b3e" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "500a5828acccd74faa9da79109363a33" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "077bae3e5e557beb8523234c92113f6a" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "af9182cf301b81b095bcfc1f219771ca" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cfd06a50a3c16e508459bfabeb3821be" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "940c13b9129884b57b9db625917d8b77" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e40e44ecc5bc8abbd7e64054bc32c0e3" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "19b8bd68c961ee6dca4d48ded6e0b946" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1c37c27a264fb7b793542cc79297e812" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf07181e4babe5844a2a8a4554eea9c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3b1453f3d97612c6ead627ec84b0ecb5" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "af806c7d80f8626e17d6bff228c3f199" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f454c72529141c34ea962ecdc296e0f5" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d5d8b3945454e5aaf3081921643a8ecf" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "beab24d9146d718d09032150d982347b" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "653ba0b3f023d288d8eec4995f4aae1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "80b23147acb83e403d32ef6b11d502e3" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "178a2c2643c99024b3c233b20f5c327e" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d45bc79d093330dedc3f9651c1cdcfe2" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "17439b70fafd8188737d55dda4c6c35d" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 5120, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4dfd06ecb5202a7f03a30873012a7b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19273728, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 6144 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 12288 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18886656 | |
| }, | |
| { | |
| "name": "transformer.h.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18892800 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18905088 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18911232 | |
| }, | |
| { | |
| "name": "transformer.h.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18917376 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18923520 | |
| }, | |
| { | |
| "name": "transformer.h.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18929664 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18935808 | |
| }, | |
| { | |
| "name": "transformer.h.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18941952 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18948096 | |
| }, | |
| { | |
| "name": "transformer.h.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18954240 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18960384 | |
| }, | |
| { | |
| "name": "transformer.h.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18966528 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18972672 | |
| }, | |
| { | |
| "name": "transformer.h.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18978816 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18984960 | |
| }, | |
| { | |
| "name": "transformer.h.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18991104 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18997248 | |
| }, | |
| { | |
| "name": "transformer.h.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19003392 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19009536 | |
| }, | |
| { | |
| "name": "transformer.h.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19015680 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19027968 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19040256 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19046400 | |
| }, | |
| { | |
| "name": "transformer.h.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19052544 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19058688 | |
| }, | |
| { | |
| "name": "transformer.h.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19064832 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19070976 | |
| }, | |
| { | |
| "name": "transformer.h.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19077120 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19083264 | |
| }, | |
| { | |
| "name": "transformer.h.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19089408 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19095552 | |
| }, | |
| { | |
| "name": "transformer.h.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19101696 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19107840 | |
| }, | |
| { | |
| "name": "transformer.h.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19113984 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19120128 | |
| }, | |
| { | |
| "name": "transformer.h.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19126272 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19132416 | |
| }, | |
| { | |
| "name": "transformer.h.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19138560 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19144704 | |
| }, | |
| { | |
| "name": "transformer.h.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19150848 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19156992 | |
| }, | |
| { | |
| "name": "transformer.h.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19163136 | |
| }, | |
| { | |
| "name": "transformer.h.24.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19169280 | |
| }, | |
| { | |
| "name": "transformer.h.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19175424 | |
| }, | |
| { | |
| "name": "transformer.h.25.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19181568 | |
| }, | |
| { | |
| "name": "transformer.h.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19187712 | |
| }, | |
| { | |
| "name": "transformer.h.26.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19193856 | |
| }, | |
| { | |
| "name": "transformer.h.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19200000 | |
| }, | |
| { | |
| "name": "transformer.h.27.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19206144 | |
| }, | |
| { | |
| "name": "transformer.h.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19212288 | |
| }, | |
| { | |
| "name": "transformer.h.28.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19218432 | |
| }, | |
| { | |
| "name": "transformer.h.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19224576 | |
| }, | |
| { | |
| "name": "transformer.h.29.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19230720 | |
| }, | |
| { | |
| "name": "transformer.h.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19236864 | |
| }, | |
| { | |
| "name": "transformer.h.30.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19243008 | |
| }, | |
| { | |
| "name": "transformer.h.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19249152 | |
| }, | |
| { | |
| "name": "transformer.h.31.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19255296 | |
| }, | |
| { | |
| "name": "transformer.h.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19261440 | |
| }, | |
| { | |
| "name": "transformer.norm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19267584 | |
| } | |
| ], | |
| "md5sum": "4d8aca5887bb60a891441d6b8e7c5c79" | |
| } | |
| ] | |
| } |