diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bc5f30d6632ac0efdc7be2e9095e9e9579af2e33 --- /dev/null +++ b/README.md @@ -0,0 +1,199 @@ +--- +library_name: transformers +tags: [] +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + +This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated. + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bc84e852f1e3b051036b88b75f2c03a7bb1b5e81 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 29568, + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5211fb14b793a30efaa6d6f6e023cdd25c1de34 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.51.3" +} diff --git a/model-00001-of-00063.safetensors b/model-00001-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d06809d311dbdc6ac2e17f744e483c373ac99cd --- /dev/null +++ b/model-00001-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48350c6adebd44e4783ccebba23d2129e3b5451818c3a9caf15fd2dbf7921fd7 +size 4982833288 diff --git a/model-00002-of-00063.safetensors b/model-00002-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdbeef887be6f82abc7c9c884790986771cfee28 --- /dev/null +++ b/model-00002-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d96544a9665cabaf4c0b9f294b98302cdd5ec754b6db618be7bb8ab10d91ab +size 4114761856 diff --git a/model-00003-of-00063.safetensors b/model-00003-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9722ae85fc83c0d336d37eabcb7abf06b152194 --- /dev/null +++ b/model-00003-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54945cae50a3b494d52f1ae545b3564174550d78b56f03885be7381c420eca60 +size 4479624664 diff --git a/model-00004-of-00063.safetensors b/model-00004-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d85f373c35aeeb0005f0b57b4a839be0fadd2e0 --- /dev/null +++ b/model-00004-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f88e89763666145d5a72bbcdb210adce3f3a8eadca2fca5d25301c7b70b1a76 +size 4479624664 diff --git a/model-00005-of-00063.safetensors b/model-00005-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19998e4779c5409832990221c8e8ef9d094a5095 --- /dev/null +++ b/model-00005-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4efa141f6ec6a0c6a1b85e9e18224eea1b8a3def8fd9323658da0a35ba70d0f +size 4815276384 diff --git a/model-00006-of-00063.safetensors b/model-00006-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abcc5b783a53936b2b6a887890bfd26d1d0928e3 --- /dev/null +++ b/model-00006-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ecfc810ded3cae3b2218157ee7e0b36558c56e5c4e83bda88c0c5bfae8ab52 +size 4748060240 diff --git a/model-00007-of-00063.safetensors b/model-00007-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f21d1ab88c5594313e20e4ba35e19f124c9f36e --- /dev/null +++ b/model-00007-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770ccfe2f2075a58e9cf5a44829dd3bfd08a8411cde505ff95fa33a7d7828f3d +size 4479624664 diff --git a/model-00008-of-00063.safetensors b/model-00008-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..546aea125f3afe9cd767e2387ec2f32649fb53cf --- /dev/null +++ b/model-00008-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec6a026f2e2854dcd863b9ffec6f13b72e0d4a47802965b1badda473d3ac0da +size 4815276384 diff --git a/model-00009-of-00063.safetensors b/model-00009-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b15e201827f0bb1ab20ae60034d78ed248c67061 --- /dev/null +++ b/model-00009-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05fdc98d062724e6e3d7b94c02e2c0608800d35d84a8d10801547312f3bfa72 +size 4748060256 diff --git a/model-00010-of-00063.safetensors b/model-00010-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5015ef2d9664c1620370166e4004fa32f9159166 --- /dev/null +++ b/model-00010-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c8ccc03018fb78b802f0b1d566d2f169f79f6334fdbd3a3970c699ef3ae5ce7 +size 4479624672 diff --git a/model-00011-of-00063.safetensors b/model-00011-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..570418b19ad927deae593eb408e5fc0aceaa51a6 --- /dev/null +++ b/model-00011-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524152ec2221d6fe1014f6c518dec3e370c5d20887f14652fb5e82ccc4dee637 +size 4815276400 diff --git a/model-00012-of-00063.safetensors b/model-00012-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..375ceaede427bda4419f03ed7b04f0562738a9ea --- /dev/null +++ b/model-00012-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ba9c6f2c1028cea096b7691376fc9a4a9f93bf121ecb0f93e7ecd94d45d4f20 +size 4748060248 diff --git a/model-00013-of-00063.safetensors b/model-00013-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d135810da644f7353b3f8ff038a4a16c33bf774 --- /dev/null +++ b/model-00013-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9c423f7b54440527455372a973866dc2fdab489be50e10da3d1ceaea99fbf5 +size 4479624672 diff --git a/model-00014-of-00063.safetensors b/model-00014-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3648a12c64c6546133e222d5f26497e6512e72ba --- /dev/null +++ b/model-00014-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4e1a42d71a2b86cb910beb7519c51648fdc53f90dd6809d297456517b4ba07 +size 4815276400 diff --git a/model-00015-of-00063.safetensors b/model-00015-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..844a9996ae19eff430f12445eb31c38dcbd362fa --- /dev/null +++ b/model-00015-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce4d245e0271300cdb25af0b86be25fab037eb4accf9d4884c06d39e3330819 +size 4748060248 diff --git a/model-00016-of-00063.safetensors b/model-00016-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2a609cb31f17b39d8c82be3839b8396e9ec0423 --- /dev/null +++ b/model-00016-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd63e4ecba64938c14c90f91826dd3ea9364f9c169b6a741930b1274e614568 +size 4479624672 diff --git a/model-00017-of-00063.safetensors b/model-00017-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7f9e7e84115789b5eafb9d60d3cca3c9812d260 --- /dev/null +++ b/model-00017-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68a8d3d8b5e07c47851e4ccca6c08595b31c9a63aaf652662fdc1370e5c428a +size 4815276400 diff --git a/model-00018-of-00063.safetensors b/model-00018-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c02ba1f5e807b970329e49c8370d4d5e0ea75027 --- /dev/null +++ b/model-00018-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc56bba741f0d941a5feb9ca264bd2c1b6733be76f602b1358c177f025e9b108 +size 4748060248 diff --git a/model-00019-of-00063.safetensors b/model-00019-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af85279ffb14f158c45a00e2da590ce6e117f240 --- /dev/null +++ b/model-00019-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:002c8aea903a794d6c06d7478809d91148adc5844c2a4055bab46bd0470f0ec6 +size 4479624672 diff --git a/model-00020-of-00063.safetensors b/model-00020-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97c1cd8e5c42419a8a83b3b4d341407ca0e082af --- /dev/null +++ b/model-00020-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6cf4015c0a8c569d8a6d47f7e413d24de3eb61f4bacefc75e913956d8ac0e5 +size 4815276400 diff --git a/model-00021-of-00063.safetensors b/model-00021-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20a8d70ee3bcf4c2c56a779d0b8cbe4547ef5dc4 --- /dev/null +++ b/model-00021-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e6f217f22cd7e8715530e443e74bb642892962eef2a8c12d19b12d1977f1ea +size 4748060248 diff --git a/model-00022-of-00063.safetensors b/model-00022-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f8f800fef97e37628b2bb9d60df82083dbdcbab --- /dev/null +++ b/model-00022-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0c76382d02adf441ca0a2cc7a2a12c635508547fb5efaedf2375c04dab9cad +size 4479624672 diff --git a/model-00023-of-00063.safetensors b/model-00023-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cfbceab2aee669a8734a06231b984d5dace51cb --- /dev/null +++ b/model-00023-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11325f4a3e56231943ac6dc8e6f1e92057bf0b3bcf6c00620585a22ab6dc1c7 +size 4815276400 diff --git a/model-00024-of-00063.safetensors b/model-00024-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d929b816f70d1c8aead194f99506537ce03d192a --- /dev/null +++ b/model-00024-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f4181ef7faee24789c530424d02aa63bcfca642fb580b959b93f215ce417d5 +size 4748060248 diff --git a/model-00025-of-00063.safetensors b/model-00025-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36bf30acacefe1c38d4db0a12de6abe76c01f7d6 --- /dev/null +++ b/model-00025-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b961ff552713a8e0aba36acf9bceb49ce382bc8f76755d1925f51abac748c2fb +size 4479624672 diff --git a/model-00026-of-00063.safetensors b/model-00026-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae4fd4e241c8b5e96b3ce9fac6b88e254afb8878 --- /dev/null +++ b/model-00026-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260edecbaea3b02468b5c77ec56c83e1c053e7ecfc4cd9fd0a916480ec396111 +size 4815276400 diff --git a/model-00027-of-00063.safetensors b/model-00027-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d48d191dd26ca115f15939e937a4e0db794b620 --- /dev/null +++ b/model-00027-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f613f660ca635c8cd5390b6b741bfa7539088dbc4cda17cec22e108032dd32ab +size 4748060248 diff --git a/model-00028-of-00063.safetensors b/model-00028-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dddb73b261999a9538806d06f0bea2b2359cd2ad --- /dev/null +++ b/model-00028-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51260f508594fdcb3cb7fc356df493e94e54d5b664ea1a92f513231248a6db34 +size 4479624672 diff --git a/model-00029-of-00063.safetensors b/model-00029-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17fb1d4f532e5f194845d1d5d0c612124f9fa9f7 --- /dev/null +++ b/model-00029-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff0a0f7b7c6ef1dfdec46b466e7eb9146f12d2499a0f60f85aa94fd10e0d97c +size 4815276400 diff --git a/model-00030-of-00063.safetensors b/model-00030-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11ac689d33255c0793dd32d45e11b5ee439974de --- /dev/null +++ b/model-00030-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e48f57f168ea4b66cd15290c5dad3b1c3b4836fa170b2620ab5aa020f41914c +size 4748060248 diff --git a/model-00031-of-00063.safetensors b/model-00031-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b261677f95fa85290a1ba1c9e1e2f12985bd617e --- /dev/null +++ b/model-00031-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25e9c0adc10e59f7d86ddc0f4f53f168e63046263de54dcfc302669208af481 +size 4479624672 diff --git a/model-00032-of-00063.safetensors b/model-00032-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..659e42e3f749daf5de4e6fe9298967cfc1d7a689 --- /dev/null +++ b/model-00032-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72822ffa21034f85174a0cd36ef26cbc69027be20f6507fff1eb68583ec5477a +size 4815276400 diff --git a/model-00033-of-00063.safetensors b/model-00033-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2ae830fb6d0dc68dddb8679a3c645217a06a97c --- /dev/null +++ b/model-00033-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d646226896beec4cd138db7efec53de17439cf6bd5461b5f6d4ea2699e3f1e +size 4748060248 diff --git a/model-00034-of-00063.safetensors b/model-00034-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee837c46a61da36b302796141b349f48db2fd895 --- /dev/null +++ b/model-00034-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d2d9382534cafc4863ce44ba69590653cdab0884527d6c491dab6b8950217e +size 4479624672 diff --git a/model-00035-of-00063.safetensors b/model-00035-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3eb65d393a03eb469a8a2e91e1ca8c6d8e70c8ac --- /dev/null +++ b/model-00035-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736c373c4e2faa14ad4c8603e2950584dfcc6ea4c450589c8e45ae9475199eab +size 4815276400 diff --git a/model-00036-of-00063.safetensors b/model-00036-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..229c861a596304406f9feceb20041bd2d511f306 --- /dev/null +++ b/model-00036-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917a8457ab3ea619a2709f11b2251b9b4b39b00a11c44e0eff320e9288a38907 +size 4748060248 diff --git a/model-00037-of-00063.safetensors b/model-00037-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08dd8c0b51e561a9ee0c8ee4a21c045f924533d4 --- /dev/null +++ b/model-00037-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d0cc92e5a8f3f9a57921c6322bebdfa17e6ca6aaac586da064648e60eb41739 +size 4479624672 diff --git a/model-00038-of-00063.safetensors b/model-00038-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d0fb61d9e9a4dc3c0ac1ab6c5bbaa5176aa3364 --- /dev/null +++ b/model-00038-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84713069e6e347d93d23f370c4fe0905eb428c85240008278003991c651314c0 +size 4815276400 diff --git a/model-00039-of-00063.safetensors b/model-00039-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e75345502e0f400a1b7acdb35af1e45bb44f4ef --- /dev/null +++ b/model-00039-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f354b1410be16e6da6047019dec590f7838ad42c441722145942bedd961ffa37 +size 4748060248 diff --git a/model-00040-of-00063.safetensors b/model-00040-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f1739568b6ef8279969eddfefe4cc4335e1c5ee --- /dev/null +++ b/model-00040-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7018c7ec7f789ff6a20b94e9d5142864c41d4905249fa00fb9683be695437bd1 +size 4479624672 diff --git a/model-00041-of-00063.safetensors b/model-00041-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aa969eee3b2c955c20d38d20e2dae5cf28269da --- /dev/null +++ b/model-00041-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd85375de6cc0bb1adb2bf0a462fe738900ac0e8439962799ab3b73a63c5c3b +size 4815276400 diff --git a/model-00042-of-00063.safetensors b/model-00042-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d42f245c20f415ae2d0d0599ee56221084c0ac0 --- /dev/null +++ b/model-00042-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b3706b7e9647dbb815bcabdb0a0b498d907623a4ed31f2876e515b23450f1c +size 4748060248 diff --git a/model-00043-of-00063.safetensors b/model-00043-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2ce7a4b6eba509412fcd59d4a4df6c897d671fc --- /dev/null +++ b/model-00043-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943cbddb5e125ec07e3c5ce5cff8fb3932bf47cca16baab129f36cb078f34ced +size 4479624672 diff --git a/model-00044-of-00063.safetensors b/model-00044-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6d81d02faa5d188a698acc805f3eb86d261b1bd --- /dev/null +++ b/model-00044-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea17dbc06a59e48a9018b0780ffbf7480e3a5e16069f039d4635f286249ea38 +size 4815276400 diff --git a/model-00045-of-00063.safetensors b/model-00045-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7d1e60edd9f981713f1e3da4dfe7278869d6dc6 --- /dev/null +++ b/model-00045-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe953f6b47d7e343bf258747e4029c6dcf6ef327c15b4c7d971936130badd9d1 +size 4748060248 diff --git a/model-00046-of-00063.safetensors b/model-00046-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c82f985821427335c27218d937ffba73860ad26f --- /dev/null +++ b/model-00046-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388eb7f50b8c9843f4f7030e2f6346957277850f3d1955c97c1836a19f48f5d1 +size 4479624672 diff --git a/model-00047-of-00063.safetensors b/model-00047-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23c0533724ee5721ad6c70880f1c089854b72b2b --- /dev/null +++ b/model-00047-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b55a7df14d38436328b6f7d47d0836d2730883855c1f63510262176d1837c5 +size 4815276400 diff --git a/model-00048-of-00063.safetensors b/model-00048-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd2ef0e79b59ebcb12e77fee20ccb245f3d9fa0c --- /dev/null +++ b/model-00048-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12db590dfb023ed223b4f0ead7dd0165ca1187a7eaf9ba1b0957467682c12f3b +size 4748060248 diff --git a/model-00049-of-00063.safetensors b/model-00049-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17208c62a821457240f746376fe892499890aebf --- /dev/null +++ b/model-00049-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932ab6f3b0665084b4a49d646291bc21cc030738a854a957a11c4d7ce37602a9 +size 4479624672 diff --git a/model-00050-of-00063.safetensors b/model-00050-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70d84a95054581bc664d790472cd2d810e294b34 --- /dev/null +++ b/model-00050-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd47edc2ccefadbada58f5fa6b97d03714c7444819c2050f81501ae39016ef9c +size 4815276400 diff --git a/model-00051-of-00063.safetensors b/model-00051-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62ef1c733f2a9df044f5b5b93a5e49e719c7390b --- /dev/null +++ b/model-00051-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f6c669202015b43378359192624ea9c0aa9ee5f760d02d2cb4bd86a103501e +size 4748060248 diff --git a/model-00052-of-00063.safetensors b/model-00052-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11978d424dc952854e225aa7f929f3f69382813c --- /dev/null +++ b/model-00052-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d922ecb02bacad5d1dc074b6689ad462bb51006cdb925bc56e05c6c976ba34cb +size 4479624672 diff --git a/model-00053-of-00063.safetensors b/model-00053-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0baa498f909e805a3348466305184c23df9716a1 --- /dev/null +++ b/model-00053-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e5434fe2990f09ea116877299184ceeb347207281cf6a8b537d7aa62122983 +size 4815276400 diff --git a/model-00054-of-00063.safetensors b/model-00054-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6827353f87df8f67db1bc988d5cd66e570bff3b8 --- /dev/null +++ b/model-00054-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7bc7fae159ae8a07c896646d90d4e04ea10977420648522e76e19742b623915 +size 4748060248 diff --git a/model-00055-of-00063.safetensors b/model-00055-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25a1dfad488d44b2523c5e018d8a93e947d1cad2 --- /dev/null +++ b/model-00055-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f384514aaeed3b2c062a31b81fe7a048823d40b925a69638714b8edea1e73a1 +size 4479624672 diff --git a/model-00056-of-00063.safetensors b/model-00056-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8b15ff8f498fe889a51b6ec65055f9756984222 --- /dev/null +++ b/model-00056-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5000c753a106abe976b3471bcf38efbad86760b129e2b1ae35a67e175e5c42db +size 4815276400 diff --git a/model-00057-of-00063.safetensors b/model-00057-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90b181a6ca20420268478a0d798ab7920903846d --- /dev/null +++ b/model-00057-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e7fc41b4f1cfd5ff591e122ad142e5440e4b152a678ba81ff049cee4eee579 +size 4748060248 diff --git a/model-00058-of-00063.safetensors b/model-00058-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2441cc08f4c80ddb877c7dfac15e91f6ca5aa305 --- /dev/null +++ b/model-00058-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4254389268c6af29ee8c4c98a489403f84ad7d572dca901e15bd8a429dd9d6 +size 4479624672 diff --git a/model-00059-of-00063.safetensors b/model-00059-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7c06bb3c3f87c833db0f069c95cb35864d9fcbd --- /dev/null +++ b/model-00059-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2985f64ffcefd08ce2900d4c625fc21337b86bb4d7c51b09f7a1300fc53a4aae +size 4815276400 diff --git a/model-00060-of-00063.safetensors b/model-00060-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..183c4dbaa6b62cc39f2b58e6ac62ea90ca7b685f --- /dev/null +++ b/model-00060-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c77eab5c2d092a626d4b954bd33e5066450f045ae71192edd4fc0d5ae2abd3b +size 4748060248 diff --git a/model-00061-of-00063.safetensors b/model-00061-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4b5bbe9c04f9c68595505e68e7061a95177a168 --- /dev/null +++ b/model-00061-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f868f9f4d7f73a6d2d3533df7338594f3f1c8ea96fbebb5a374e3eb62723a504 +size 4479624672 diff --git a/model-00062-of-00063.safetensors b/model-00062-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7602c52a97cb1ea214cad0d49919777192fedbb --- /dev/null +++ b/model-00062-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d940cea3661a2ac0389e2d64678b7632c5ce3830bec543750f6230f0bb05ddf0 +size 968982984 diff --git a/model-00063-of-00063.safetensors b/model-00063-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f3a62e67a53abf6cf346310213abd5314ce4222 --- /dev/null +++ b/model-00063-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d13fa5fd724a2bb169caf9251e812f8c2fb767b0dbe644d0b7876b47c1646d77 +size 4982833280 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..0d94ccf36cd25fbeb196bf9da00cb9a0debc6ef3 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,970 @@ +{ + "metadata": { + "total_size": 290824814592 + }, + "weight_map": { + "lm_head.weight": "model-00063-of-00063.safetensors", + "model.embed_tokens.weight": "model-00001-of-00063.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00063.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.10.input_layernorm.weight": "model-00010-of-00063.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00010-of-00063.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.11.input_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.12.input_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.input_layernorm.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.14.input_layernorm.weight": "model-00013-of-00063.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00013-of-00063.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.15.input_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.16.input_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.input_layernorm.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.18.input_layernorm.weight": "model-00016-of-00063.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00016-of-00063.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.19.input_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00063.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00063.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.20.input_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.input_layernorm.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.22.input_layernorm.weight": "model-00019-of-00063.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00019-of-00063.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.23.input_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.24.input_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.input_layernorm.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.26.input_layernorm.weight": "model-00022-of-00063.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00022-of-00063.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.27.input_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.28.input_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.input_layernorm.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.30.input_layernorm.weight": "model-00025-of-00063.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00025-of-00063.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.31.input_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.32.input_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.input_layernorm.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.34.input_layernorm.weight": "model-00028-of-00063.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00028-of-00063.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.35.input_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.36.input_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.k_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.q_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.v_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.input_layernorm.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.k_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.q_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.v_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.38.input_layernorm.weight": "model-00031-of-00063.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00031-of-00063.safetensors", + "model.layers.38.self_attn.k_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.q_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.v_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.39.input_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.39.self_attn.k_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.q_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.v_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.40.input_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.k_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.q_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.v_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.input_layernorm.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.k_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.q_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.v_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.42.input_layernorm.weight": "model-00034-of-00063.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00034-of-00063.safetensors", + "model.layers.42.self_attn.k_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.q_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.v_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.43.input_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.43.self_attn.k_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.q_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.v_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.44.input_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.k_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.q_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.v_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.input_layernorm.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.k_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.q_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.v_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.46.input_layernorm.weight": "model-00037-of-00063.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00037-of-00063.safetensors", + "model.layers.46.self_attn.k_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.q_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.v_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.47.input_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.47.self_attn.k_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.q_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.v_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.48.input_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.k_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.q_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.v_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.input_layernorm.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.k_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.q_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.v_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.50.input_layernorm.weight": "model-00040-of-00063.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00040-of-00063.safetensors", + "model.layers.50.self_attn.k_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.q_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.v_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.51.input_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.51.self_attn.k_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.q_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.v_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.52.input_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.k_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.q_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.v_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.input_layernorm.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.k_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.q_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.v_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.54.input_layernorm.weight": "model-00043-of-00063.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00043-of-00063.safetensors", + "model.layers.54.self_attn.k_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.q_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.v_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.55.input_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.55.self_attn.k_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.q_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.v_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.56.input_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.k_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.q_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.v_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.input_layernorm.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.k_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.q_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.v_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.58.input_layernorm.weight": "model-00046-of-00063.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00046-of-00063.safetensors", + "model.layers.58.self_attn.k_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.q_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.v_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.59.input_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.59.self_attn.k_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.q_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.v_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00063.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00063.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.60.input_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.k_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.q_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.v_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.input_layernorm.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.k_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.q_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.v_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.62.input_layernorm.weight": "model-00049-of-00063.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00049-of-00063.safetensors", + "model.layers.62.self_attn.k_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.q_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.v_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.63.input_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.63.self_attn.k_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.q_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.v_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.64.input_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.k_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.q_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.v_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.input_layernorm.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.k_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.q_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.v_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.66.input_layernorm.weight": "model-00052-of-00063.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00052-of-00063.safetensors", + "model.layers.66.self_attn.k_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.q_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.v_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.67.input_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.67.self_attn.k_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.q_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.v_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.68.input_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.k_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.q_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.v_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.input_layernorm.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.k_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.q_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.v_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.7.input_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.70.input_layernorm.weight": "model-00055-of-00063.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00055-of-00063.safetensors", + "model.layers.70.self_attn.k_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.q_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.v_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.71.input_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.71.self_attn.k_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.q_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.v_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.72.input_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.k_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.q_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.v_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.input_layernorm.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.k_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.q_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.v_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.74.input_layernorm.weight": "model-00058-of-00063.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00058-of-00063.safetensors", + "model.layers.74.self_attn.k_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.q_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.v_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.75.input_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.75.self_attn.k_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.q_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.v_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.76.input_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.k_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.q_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.v_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.input_layernorm.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.k_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.q_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.v_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.78.input_layernorm.weight": "model-00061-of-00063.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00061-of-00063.safetensors", + "model.layers.78.self_attn.k_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.q_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.v_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.79.input_layernorm.weight": "model-00062-of-00063.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00062-of-00063.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00062-of-00063.safetensors", + "model.layers.79.self_attn.k_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.q_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.v_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.8.input_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.input_layernorm.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00008-of-00063.safetensors", + "model.norm.weight": "model-00062-of-00063.safetensors" + } +}