diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8545 @@ +{ + "metadata": { + "ParamSize": 563, + "ParamBytes": 145412407296.0, + "BitsPerParam": 14.781631589720977 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 2491416576, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2491416576, + "byteOffset": 0 + } + ], + "md5sum": "6822b8621a6af7062c219ba13b241400" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9e23967d16a2978826cf5a1cf69eec9f" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8149157c63c1db21f997b5a04526e26a" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 2491416576, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2491416576, + "byteOffset": 0 + } + ], + "md5sum": "e05db141ea5914763186bc15b5455830" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7cc2234aceba3bd45473edb7d2f316d7" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "013c411de70caa7ceb022810e3522b69" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2c4951bd8179f1d96c47bd54d5016f28" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b4617cb42e3a9dccc19ebc251e9dc3b7" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5a46715a227b454e18dabfa9fae2f72c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "be71adbf7c3c907ad2d9a9c86c71bdab" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6e086698a4139fa70be145ccddba7065" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0285ebe40ee1fd3ccbef36a9856ac6fc" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "13dcff2398f6088874c177725ca52a9e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "9fc86d8ef1ce3f63868b3c22c0eca6c7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "dbcbb7288507b49d0dd716d72f266922" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c5c752dec3029739c7b2b3217aa609d8" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7729b1ec8b830c2767cce14db2953ea3" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "258f73c756e446bfa0d36ecb801c767b" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1e4af2b2bc94de53df12bbe8066879f5" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a13565b6c93de30dda8df11f47a27265" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a4de50155e8f7c42c070a408299fb19d" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "73313f5f1aacfd56f539fa2798f80156" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "19863186f03a3cd30ec5c173d65b5129" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ddb284fdf672137478820cf4a3d7dbcb" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7c545dd4d7566f4e6dce371fe02a61bb" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b217983ec61d5e256f2ce75c3e419496" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "febc5f52ec3b9a5eb52661d63d3f52c0" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cb4abc4743d75417094437a3365e08bc" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "71e383a31367874b7c4729f2a18a5750" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "48d52c93b8d862d91f72149205a6a053" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c841bc12a38f43f659d09a7c0e56604c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "abe6598aa4a493f24a379affa81894cd" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "82970cc6f1f73238463300090974abab" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5653ea5b39d98d4836c5f37f9793ed53" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9ab679b8f4570094d511c917651bc82e" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "feb978983e842634b79873e66910120a" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8b931c14bd6687a520a975a0f2aced24" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "330ab1bb2a0bdaa0f035f07a6a3f70de" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a637951d208a41140c999164232cb41f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e0ec884a6ee000bb1ccd2513b758628f" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "95458625256e5260464270bdd0790914" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1eac8e8af9bce791e2e7d56ace23feb9" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e209d0ceff53f072978bea265fae406e" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "6d3920122f80ddf9ee28982e5a207d89" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "99a5073864dd89b86a0ae8af0cfeb052" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "34c1acb6b07b66cc3beee63ae99beb67" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c0bf0387874e68129d35077079d78000" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0da171d3a167e36ccb527982259e4102" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a9184d050a86f87b155c0a468d5bb403" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e3ccd8f5a1690127097a9d02dc6389f9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8651d3393aeb1e725b56d8a60fa4e901" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c5e58db93ceb83a82ec132378e61ac99" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7cdb9b3facae230c91192ef4000d4cc9" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "51951a5375e93ac9bf6c302ca129a2a5" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "15d9156913aa0d9b2e68d0ccefa89ae6" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "f4afec19dcb5512d7ccf52231ad36ebf" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "057766f81fbf2f9bc5b08fae2451b56e" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "23ae0b805fb3796e733f83ee90125233" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "9e2dac300b54989db7496fde321d23ef" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "86697abea67d22e6fdab76c2984505b7" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "fbe92686c0c0a512e94b3d05fc8f550b" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5ab7aacfeff1dde1175b3681fb7e76c9" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "33e931042814cac513f62c11326fd964" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "567f000944d8042c16ad65fe5d5e9daa" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "de87021b059be16a4b5bd2fdd744c2a4" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "cb284db9037969f74347b7e885e84acd" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a7a21fca36277402c1fdefffa020aee0" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "f628bf26b65f6cae4c3ec9e966bc824b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "92d2599af6ce34137aaa709e153791fd" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "273809e1079c317aff28cd06af944c8e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3d2f52996dfdd6ac3084e33117997b49" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7e3cc75812050735b9cc28db411e0fe4" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "12ca1d8dbea4c84dadd45324d149c5fa" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8e80154959ff956d89b53ed52795657f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f8f11e09422c62e587f6928f615ade43" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5684a75efa3cd08438eba60849111951" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "98133ddfc44f675d0cca267662ce2961" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "26a0cc03c7f0af315a8eb0d62a33fa89" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "adf164db9b1d830a44536237ae7bd08c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d81a4d68364d4b937dbdc70d32ecb0b6" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f716a1565ce7f8ec3e582ae59d5c61b1" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "04a09206ab2bfa53bfeb7ec9cc54bcf2" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "90c9b0429a55838d560724ce0a017599" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e66a476da99649f5af13178b816b5dc5" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2b6fe5e56a4b921d0f9f8c3e10fa5d5b" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c2b1f31d34010ed8b3087e12f82931e7" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "90d933b72c73570de4da18f65b962155" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "5a8b917afcbd8efe97272dacb537f9e6" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "4c3b9c1fe7487474cb108db3b2c01b6d" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3b1650261d9968b8c1978c44660ca712" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1ef59b44472342b33ec970048e382f7a" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2d805c1ed1f769d316ce8f8645cd869c" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d5f0ac863e5d68edb555dea71710fed6" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e2a8c7d2ab800f870c81f671ba4a25b5" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "44f42ebc2b511dc2ff1744051b70aafe" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1497f62d492461fb1941efaaa97cfe80" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "01aea945f93051a85dfb2c95e2317101" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3e21bfdc5970e907e680dcfd04d4ddf0" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "2eb0252fa430bf5a602247283f22c795" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "9ff0d03f859e1873d709d9c9b2acea69" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "bf0edb51400072eac3effee8b266a73c" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "654b8352123af9ff601f07198139da8d" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7221865ba4098e230c9b88271be27821" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0a19460c6c0f4eef0bd243e9a752cafe" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c2709ccc1beb3d9d6d000950eaf2d7d3" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "53cd8bd00f71926674d579ef544fe68f" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b0dd818c9fdb2adc96eb02536163d798" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "24970b95e1a9129c96b41aa46da30a2c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c03713f1afe310fb18a3b0830d36a867" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3b9e837afc89624d38ab7dd69049bac4" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f11b5472bd02d20f9708e64c8bebf959" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7b98f0cfcb1f0d15f353a053275f57e4" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c5211904369cdd0e372f5b3fcee42e87" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "dff32756fd25d37ac1c3e3f7637e2aaf" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "60f8c6654a66f0b1b393803d00dbe499" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "735a39d57fed3e197934f2f69d9f6c1e" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "67b51c53759b3e1327e9ca9f6a2d11b3" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e64c498ce1ddf860831b3aff2e2a540b" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "b66fc4626d62eb803f07bb0a255988a6" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "0b835c506a0b6d9d30285eb6e0e77db6" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a398a05ca6e410c87e0ab2566b39160b" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "17ce110ac422aae5a9e323ae56e38225" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f99303c397a7e058d421ca86ddfe9a6f" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "debe4442c68c7a1b122ebc37d0a1bfc9" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "039b38ee6d1155bd746236edaf647628" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ada6aa7c34d2eacae12f0510413e5f74" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8f7ae029ae8755add63e1eaf5aebd7ea" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "cddda8827f23864ccce705dcaf8a095d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "cda02fd86a5288c066892c32abf6e57e" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0921cd28d76f090c76dc33f1c869c31d" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2ea88b4690a8aa6eca5393f0f8e2973e" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "aec505172a2f6d542e3463c329877ba2" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "d05cda0987efa92bad77ac335e6a5a68" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "df37da6408541822df29856f4742107a" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e1af6cfca4d7862b2c121746626dd2d7" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9437d686cbce5bcf20204a297e52cd33" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e8873457a698f15b69b8546214cafbc4" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "403e010b95639b370f39f2015411a17d" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "fe0759b141c22fe036358328a90419a4" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9417b84eb5b732bbbeea225ba64d857f" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "83c60ab3514e7040869cad477b453865" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b1442db2dab4e4bd06bc2e86fc4ffbee" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f45a618a81a1d8dde6b40ea17995ba2c" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "888e1d256ce746904b8d7efcbc66f7d6" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "df3c8c4f2f04d11db745d634b03d0e39" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "878eb12ada758b248f29031a8b21283e" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9dc71c8537a3f5739808c9040fea15c7" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "89e29896d0fb35d0875698dd533b343b" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "725536f67c308782855d78c4f65173fb" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "9db2c76b072496b21d92da9436ddf948" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9e5bc2597232858a09df542de0e92318" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "57bde2061b302ff70cf7483239948b97" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e319fbded4f1257433e0f2c20d103952" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ebf743a9fb5f0a1c368b42068973c523" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "bf0944533907023bcb98b0e29569ba4a" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d3eafc3a8606f6e10ad950c6d7c32cd2" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "58ad22d2d70ac84bc4635c8771c8f47b" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2cd848b9cb59b6d64f1456af046cd027" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "031b6af23c051ef20716724de8a716d5" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6bacaf291916a48e0ba91c40f2259461" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b6905240ad12ca1cdb1302dec7f49745" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f24664fddfb78002286e93a1dcc89242" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b18b1202ec7e295676f8eefd9fd2919b" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8dc250f12e412e24282be53e023a4ae4" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f8989771d471fbef436935d265f97946" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "daf392448dcc0e8e074eb54d066cc60d" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9b9a2695da16e824b873df0d7194b42f" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "08b603d10904659d21c9489c583d7f43" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "81fd1ea8a1e079453d29b7f95d4eca60" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f0ee7ee1aa7782bb2821ff08f80706d7" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "43b342599dd03a5049fe75d300246774" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "edf4dade27e8f3c5fd941c3e87611e9b" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "91f58b34bc35f9e68bce5c3c38719d05" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2c56a6edc968d6a9419ae0da16f17441" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c41b93c59f167c6b518ec0a6a1242b54" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "79072589ec066d748298d2a952e50f08" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0c1bb8276dfbc536cc82f029176b6a23" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cdc5a7eada85cb3b7d55f28d47b4a983" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "6c89ac1b2a7f57a9f04bc18c861bfa84" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6b2ec4d8ea9c287522ad3e4c07794ef2" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "fadbdae9688ce58b73a387c8aafec683" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "fffda01cd9ff2c5b509e740a099b2a9a" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c960ca13c2d788cbec90a2d7578c5708" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8f5510e6b5fe4da83d2e70ccdcde366e" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "3945759eff833f540be9cb7977197bfa" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b0eb02cdc72299259209add5ef7ebe96" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7d041147bf3392ce792748df52db43d8" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3b7968fbf19812b60f5ff52062678015" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "6503b1cb784659a50112011efc20a521" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "466e4ac8f2d5e3848df120e640ab0781" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1b7802904da9a6ca741ba7c8362a1ceb" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1996fc62a7a4277fe052f9c14a9f0b53" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "64ac421b9d84896d205e3b0bfa01c929" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "237f6b6efa43df411bc4f7eeb2c366f9" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "46ef8d1577c4175110b492bfcf7a5f69" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "970ccda9e521fe4bc4cf818dbadd00e9" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e09615369963f8f22580c852cee04766" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "de7349965e04813b03d36f3a144d503a" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "99639e936c083d2aa695c89e309b7aaa" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "23f5ccbfb23a53296b81d3264994e4d2" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "cbd92cd60d950d5f01696f661a4b0e6b" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "81abcf5d0a08baa58b7e6d1636f76a61" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "88cdfa01780d77b2b9a84c0bbbf59acf" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a13aed90bc39571cd0e6c1bfe5d999ea" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8a091a8c8d8295691c45da55e457ec5a" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "223254b1345b0102bd836821c019f03f" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "8707e471e819401ed66c7f0887aeaa20" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "972dcf76e6f7e22d9aa3da9113aa4655" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9f6704009f71fe0ed48009f129a68778" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a1131113345b399e55a92678d0da3dcc" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "991c623cc6b723c9c35222abdeb6e28b" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a5b11a1333e0069bb51a607c704aa915" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "310b90295c76dedbe5659a71d9fa84c0" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "47569ee780eed3dd3528747d9039ff97" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "5c7aef6d4ae40cb1082006eb4d8244d3" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "502f9b803a5cf8228129354476050f97" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f171ab2fc6296f06d22ea27c8b32079b" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d5c46e45d1953e986482ad1cb4a4436e" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "25aa273a816881bbe21c22ff522fa9c1" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f237fb431512551a634c08b7003bb767" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9380c99bbb8acc77a2f2202ad82265b7" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e5975dffdcd0d2b1ae756920d8ff0166" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1b4aca525607a3bee4d75f86bf959b48" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b58b7a5b0467d452f9f553f3165717f5" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "bec5fc6f3d343debc2e09dd3eb31539a" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4a90b1b41dd0b1b8ed26e66a2de9dd43" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "061aada9a554f2d6a91dc346eaa89750" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b4441b68854e356390d221c90763afed" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8b19620441eed22865f48372b162e6b7" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "445e74e28fc9c0d98c654f14e9ae4b4a" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e97c73a815c69e830a9c4e5638c8f9aa" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f05fdd1ba6dd6c0c63166c7c98250983" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6c1573202717126bb41a03766dc169c6" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "80128d5d8ee3d9cebbc22d40ab98d69d" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e70965186ded2d9a86bfd9505714e71a" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0d2ee4f084a6c89824b81e9d45a33d27" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ca1e88e7618618bc67b513c3614582dd" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ed9790ef82baf4994d4b236dc88459c4" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b48d7e07a78b1a97bd52e96ecb9ad44e" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "434c7c5d8bb8b1a23120fa72b84a157a" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f36f799014ef8d00bb386b68629cbe12" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "676978de8498e16fbabde26824cdd9a9" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "242bcc4397c9b7c356e136a10fb3673c" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1b62fec2544c1d644da8ced15fc0d8b8" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a3335912323bc978d2dafb570b5d8a3e" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7261b56b8b04d53b05f22551ccfb5f49" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "7b8cb70444311fbf2504e8a05d67c266" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "da69e1cfaba0ce5c0382572496996a0d" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1d16a488b5692e9c074ffc829f7061bd" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1b378e440ef1f4f3b42ef46ce7486575" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c2d2ab8c3466cf38227fa97d9ef28315" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "79065b31ea8564f55f3dc6a1628302c6" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7d000576e74be6fcc0810cfe3eaf5cb0" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ad9dc67db837a77286ca8bcb6a2e5527" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "041273e3d856bcc9002831d4b094d917" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "751e7ac139ab044d4349cb8b2be159ed" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "477a0ea6f7bac46853fe8253d54da4c4" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "510562dcc63a05dd4bce17aea94eb42a" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "98b8f6320c003a22eb2cb07786af3e31" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c1c1e8657914df71d451f858aed79514" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "51f5043bc124d531767640d81465dbc7" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c497d21d0233390de5fdb1a51c4815ba" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cbe6f914a57f5d42e786128c57f6117f" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a9fc80009468fb21f73181ed7765b634" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "0c55c1feb25b61abac2f02f80366a383" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "b21a120ab460f1a15bc9f022b1ba03fa" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ce7163e5f68c810e4df781150f1deca9" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2fec128260f80c68a009a75fa006e868" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "17d884791d7b1e5de86218ea4b413ad4" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c0728a96302992d7bf3305e3e202cbd6" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "bae0d755392fce75da9b95fa50a435ba" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "534f298a6d37bc5855f266b957d5782a" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "52cc4b50b3527d7e64ab7603015369cf" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a602f39a4e4963224f44c5c34e9403af" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "77903b25d6a3cb5d6d0b60e76217beb7" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "21dd05f4e04a600d9d13216e048dc4d1" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d910116612d6cb57fe417d8f8c4c7fff" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c3c1820c4b711b74336b9c001b03357e" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b70f9dd58570cae8f39e331a07d64bda" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "86bf47765cf9848b6b56cd5b26db2a2a" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "0aa40406a685095150a9e2dd038cb030" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "5226dc6184f51b479451cbbdf13f29e3" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c20715c64e7115b340c45aeed357484b" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2b4546dfc3d25715fdfa991c6adde4a2" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "b4245de5fe24220d506c2b8e55272e40" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "302357cdfdd8cc366bbee02533813d8b" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2ac2919faee3e33717477afe3c541a02" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3f1569e896a997230f19a9368ac3f09f" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6b29ca9476ab09767876f0eb36329630" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6ff015951be03868f181e88c01fbfd76" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1a38c9255768414eb94f656dda0b85c3" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "fb56e0108cded410861f0f928fe52170" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "5a6f95d84f61810c479923f37ba7cfab" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c490315f66571bad9b6cea588e6706c1" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "22863a104130cf3e2777949cafb89ba9" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "98c68c758b89e41c7b3fcec63b021055" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8ebb873f7679e17ff8a69b5138ab5940" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "54441e2b621389dfe21c6e69fd495b6d" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6b71cd3fb3c860d2e30ed85b75596809" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0751c4ac092732b2d21f3d8933c8a7b2" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "fdfcea70725f3059fff7185c9e0bbf82" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f3f0ad7d6b11e747e050bd519182e9f1" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3bd4f3d983d875d1c7fd81f15d2b0c92" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "da0b7ceda4fb1b5886061bbd70db1564" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8654ae84e06cf173d9e2bbcf91d82779" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "3a0dd471d26b838d53e4f5112f403ca1" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "95f3d8f881de6d4ed00173676fefbdea" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "21acb56416db2a26d1b7483be159db43" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3e24c8780126a2e3c35a37646bc1a1b5" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d90849a00879a7a59c9819a23537659d" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1ff42e6cb2ebecc147785a76053f2fa0" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "16e66969c817ba2b3eb791644c72dc80" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "45ef76ec00dcb4af246e286b13b1930b" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "79c69486c89f9b9749fc2b9a34e48df6" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "65cc14eef03f6b0353254380220c4b76" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "42a7dbe5a2a4ab5c70c00a88358ac9a5" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "fb27f83f815474a75c2c26e55eb9fc94" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4889bd6645f7cf4a391040eb050be8bd" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5e4b535caf9fe532028ee790fce959ed" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a7368ac05706762c675913e57e5d85b9" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2c726001635acdf0da3a55c1a0b7b147" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9142f64b3b8c27b5166718c825444dbf" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 4276224, + "records": [ + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 0 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16384 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 32768 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 49152 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 69632 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 86016 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 102400 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 118784 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 135168 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 155648 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 172032 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 188416 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 208896 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 225280 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 241664 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 262144 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 278528 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 294912 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 315392 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 331776 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 348160 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 364544 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 380928 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 401408 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 417792 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 434176 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 454656 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 475136 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 491520 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 507904 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 524288 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 540672 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 561152 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 581632 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 598016 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 614400 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 630784 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 647168 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 667648 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 688128 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 704512 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 720896 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 737280 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 753664 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 774144 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 790528 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 806912 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 827392 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 843776 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 860160 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 880640 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 897024 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 913408 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 933888 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 954368 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 970752 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 987136 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1003520 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1019904 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1040384 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1060864 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1077248 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1093632 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1110016 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1126400 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1146880 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1167360 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1183744 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1200128 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1216512 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1232896 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1253376 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1269760 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1286144 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1306624 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1323008 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1339392 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1359872 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1376256 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1392640 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1413120 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1433600 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1449984 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1466368 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1486848 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1503232 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1519616 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1540096 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1560576 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1576960 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1593344 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1609728 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1626112 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1646592 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1667072 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1683456 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1699840 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1716224 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1732608 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1753088 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1773568 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1789952 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1806336 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1822720 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1839104 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1859584 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1875968 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1892352 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1912832 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1929216 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1945600 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1966080 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1982464 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1998848 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2019328 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2039808 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2056192 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2072576 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2088960 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2105344 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2125824 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2146304 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2162688 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2179072 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2195456 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2211840 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2232320 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2252800 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2269184 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2285568 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2301952 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2318336 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2338816 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2355200 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2371584 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2392064 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2408448 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2424832 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2445312 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2461696 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2478080 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2498560 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2519040 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2535424 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2551808 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2568192 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2584576 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2605056 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2625536 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2641920 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2658304 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2674688 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2691072 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2711552 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2732032 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2748416 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2764800 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2781184 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2797568 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2818048 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2838528 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2854912 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2871296 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2887680 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2904064 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2924544 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2940928 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2957312 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2977792 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2994176 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3010560 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3031040 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3047424 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3063808 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3084288 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3104768 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3121152 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3137536 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3153920 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3170304 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3190784 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3211264 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3227648 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3244032 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3260416 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3276800 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3297280 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3317760 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3334144 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3350528 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3366912 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3383296 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3403776 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3420160 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3436544 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3457024 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3473408 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3489792 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3510272 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3526656 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3543040 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3563520 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3584000 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3600384 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3616768 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3633152 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3649536 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3670016 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3690496 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3706880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3723264 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3739648 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3756032 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3776512 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3796992 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3813376 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3829760 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3846144 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3862528 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3883008 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3903488 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3919872 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3936256 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3952640 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3969024 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3989504 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4005888 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4022272 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4042752 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4059136 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4075520 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4096000 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4112384 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4128768 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4149248 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4169728 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4186112 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4202496 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4218880 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4235264 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4255744 + } + ], + "md5sum": "de0187bc14b08046224bf0746aac9217" + } + ] +} \ No newline at end of file