diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc5f30d6632ac0efdc7be2e9095e9e9579af2e33
--- /dev/null
+++ b/README.md
@@ -0,0 +1,199 @@
+---
+library_name: transformers
+tags: []
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0fab4610b5e8560a8172a972d1dbbd448ae7216f
--- /dev/null
+++ b/config.json
@@ -0,0 +1,80 @@
+{
+  "architectures": [
+    "GptOssForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "eos_token_id": 200002,
+  "experts_per_token": 4,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2880,
+  "initial_context_length": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 2880,
+  "layer_types": [
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 131072,
+  "model_type": "gpt_oss",
+  "num_attention_heads": 64,
+  "num_experts_per_tok": 4,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "num_local_experts": 128,
+  "output_router_logits": false,
+  "pad_token_id": 199999,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "beta_fast": 32.0,
+    "beta_slow": 1.0,
+    "factor": 32.0,
+    "original_max_position_embeddings": 4096,
+    "rope_type": "yarn",
+    "truncate": false
+  },
+  "rope_theta": 150000,
+  "router_aux_loss_coef": 0.9,
+  "sliding_window": 128,
+  "swiglu_limit": 7.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.4",
+  "use_cache": true,
+  "vocab_size": 201088
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f1e86865b5fa51c4b0e46c2042000a8f1f24672b
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,11 @@
+{
+  "bos_token_id": 199998,
+  "do_sample": true,
+  "eos_token_id": [
+    200002,
+    199999,
+    200012
+  ],
+  "pad_token_id": 199999,
+  "transformers_version": "4.55.4"
+}
diff --git a/model-00001-of-00073.safetensors b/model-00001-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..77ddd8b5692d287c025d8a69fa480a87ce1e5ce7
--- /dev/null
+++ b/model-00001-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:252c725875058ced4142c6d73e7df658357da7298eb975f9389a441c14162aab
+size 1212106064
diff --git a/model-00002-of-00073.safetensors b/model-00002-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ffc0cdeabd7728fd42611ee0398fa4b6378779da
--- /dev/null
+++ b/model-00002-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66f8d893fc4fbd27a07d2454114d5cc84821fa18d7575b625aa184b31ef0a572
+size 4248207640
diff --git a/model-00003-of-00073.safetensors b/model-00003-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..440cb618f18c68040b31672419dee5d270188677
--- /dev/null
+++ b/model-00003-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:616d24c201548aa129e9a6b2e2f0c58020b1eb9f9206520ae3d76c90e1a9276e
+size 2177954736
diff --git a/model-00004-of-00073.safetensors b/model-00004-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..83f6cf600c5f0c8b103e33df6643999950836fb8
--- /dev/null
+++ b/model-00004-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dda6a71a050e68fe98ddfc12c0e3e86234b29d2ee5a6ba9083cabcc9e173563f
+size 4248207640
diff --git a/model-00005-of-00073.safetensors b/model-00005-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cfa7ea4e45e5d638eae2ac621a37393158933ac2
--- /dev/null
+++ b/model-00005-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d17ed54b1810bd4c124cd6c1b1b8d034565840ab97133e675a7a639da7f42cf
+size 2177954736
diff --git a/model-00006-of-00073.safetensors b/model-00006-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4826abbdd222efc4dd73a42a68a3767db186b184
--- /dev/null
+++ b/model-00006-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01b81738668bce7b58ec8cef5e8c25b1e2b7afafcd1af7913da2788838263bd0
+size 4248207640
diff --git a/model-00007-of-00073.safetensors b/model-00007-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..55c464b2a164cfd91212d30daab10a7f620cf54a
--- /dev/null
+++ b/model-00007-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a603f5f3cc6a2b67fad59c42523c6bf05e9cb00a497dc0fd1a149b3a66c276c9
+size 2177954736
diff --git a/model-00008-of-00073.safetensors b/model-00008-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2420db70e716e40cceab702078683d15ae2a9c77
--- /dev/null
+++ b/model-00008-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b43d83c403e92882c79cdfbeb8b80e30d0d7fe58329ab8d3a8a13db0c1d7c5d
+size 4248207640
diff --git a/model-00009-of-00073.safetensors b/model-00009-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9bbb647265e7df34240801e993ea27553c617e6
--- /dev/null
+++ b/model-00009-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c62fa5fa69abb52e2b3b6742e8bb40552bf29f9897aeed4be8cfb3fc46c2e03
+size 2177954736
diff --git a/model-00010-of-00073.safetensors b/model-00010-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e60d1990f00f619edf4f6cd8d8c70146e442bea3
--- /dev/null
+++ b/model-00010-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc516fd1e058767eb5d566b81f2619fc9319aed76409628d94b9fda5e4994a0
+size 4248207640
diff --git a/model-00011-of-00073.safetensors b/model-00011-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..57b011a491c8072c0cd186491c951251cf966559
--- /dev/null
+++ b/model-00011-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9617aa735c69865370f29dea48fda3856459d688c99ee13b86a1a59bbf5002a1
+size 2177954736
diff --git a/model-00012-of-00073.safetensors b/model-00012-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..972054b5252c2a5dfab20c4eac58bd0309ac6975
--- /dev/null
+++ b/model-00012-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39b2a45ac61f5b5d44e5a2df4b88643f8c7d57545b06c60121d954a9598599c1
+size 4248207640
diff --git a/model-00013-of-00073.safetensors b/model-00013-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..65a9f17ef058a92d13f50f2b8edc07a784031e3e
--- /dev/null
+++ b/model-00013-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c41bc6d8f44f20387f8ba68e1d40c185914fd9188d41f7370fd0ffc3658fde87
+size 2177954736
diff --git a/model-00014-of-00073.safetensors b/model-00014-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8fb665708d218c8c30bf3b87cbf7e0911c014c5
--- /dev/null
+++ b/model-00014-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a4ba78d4b340e08bcdb3fbfb48c26cc57abd581046b6a2a6620b9c63cca14aa
+size 4248207640
diff --git a/model-00015-of-00073.safetensors b/model-00015-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..07ef24f1a90f342e19d3a89071e12f9af72602ca
--- /dev/null
+++ b/model-00015-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c834f5d1c35da0136559ddb092969a0ace205292b468f5c8a7a17de4c0c31e26
+size 2177954736
diff --git a/model-00016-of-00073.safetensors b/model-00016-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b231d9ea3cbea1a55578d11a9586d7536ac7c0f3
--- /dev/null
+++ b/model-00016-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:925cd470b083f8ce05484d991c989419873eecf562080947d4c64649ab56677d
+size 4248207640
diff --git a/model-00017-of-00073.safetensors b/model-00017-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3dec2c246b08278d07721a78c61df118b3043757
--- /dev/null
+++ b/model-00017-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2465c19da8e1bf32a1daa76f33e9a627f55ba5f970a2612fcb061edf0a7373ba
+size 2177954736
diff --git a/model-00018-of-00073.safetensors b/model-00018-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..284f05e627e9326343342712706cf4d4af78b817
--- /dev/null
+++ b/model-00018-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f52b5fe061cefc784b2fb5a09ce6f30fe1fa6641103a141a5b8722df2a48aff8
+size 4248207640
diff --git a/model-00019-of-00073.safetensors b/model-00019-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..334d423595f40235e132444f21cd26f5d36cdcf5
--- /dev/null
+++ b/model-00019-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64a8525d17b2856642ffccbaad96a24f187908566e20a08cf638a9684524ff43
+size 2177954736
diff --git a/model-00020-of-00073.safetensors b/model-00020-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0ca9839d90ea7c40c1c76454e1ede34b8f6b8480
--- /dev/null
+++ b/model-00020-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3484899ce81bacff6cf71f3e225152d61b3f688906bdaca527c87c5eb35d7c1f
+size 4248207640
diff --git a/model-00021-of-00073.safetensors b/model-00021-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c509acf42cf93928d64b127b598e1b7e42c98b2c
--- /dev/null
+++ b/model-00021-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a21097d3437dab4af6d3e97934299231571b9c029d9c222bd48c89203d183f3
+size 2177954688
diff --git a/model-00022-of-00073.safetensors b/model-00022-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1046e178507ebf5cc9a623a26d45ffb1b6254d5f
--- /dev/null
+++ b/model-00022-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d18647e59b8982622412fa33d9c8defbac9441670fedeb94bf216382f904e48
+size 4248207640
diff --git a/model-00023-of-00073.safetensors b/model-00023-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..525c2f47ba0fe23885e8f7be6fce2e2235c5d8ae
--- /dev/null
+++ b/model-00023-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:189fb0c93908fbab9d3a2263c0fbf411fc5c34c6996abc5dc09ab2ef6441ec8a
+size 2177954752
diff --git a/model-00024-of-00073.safetensors b/model-00024-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cf61bb785574a6ebca04b516a646503ebc931f8c
--- /dev/null
+++ b/model-00024-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f75eeb2761915a28fd12ae35d2fa13b6b984d9c6829184dc6a81bb778d1811c2
+size 4248207640
diff --git a/model-00025-of-00073.safetensors b/model-00025-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ab675ef834bbdc900b2e21217babca069fbec79a
--- /dev/null
+++ b/model-00025-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a13ea1ee2f9b4bda3e859b6bd0a2dee3b9cbba22d8e265c73695e2702b7cae2
+size 2177954752
diff --git a/model-00026-of-00073.safetensors b/model-00026-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c0878898b2bbee01fece50654f41f0c4ce2ebba0
--- /dev/null
+++ b/model-00026-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78d69f06e29de3eeb3e8df6353d418cfca5a6abb432f90b95c750252e5d3b7e1
+size 4248207640
diff --git a/model-00027-of-00073.safetensors b/model-00027-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2ed923a35ee797b373d4b38b3451e54309ebf594
--- /dev/null
+++ b/model-00027-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59009695846bab1177519de9675a407f89de775311b751f9bd4095434c1bc8f2
+size 2177954752
diff --git a/model-00028-of-00073.safetensors b/model-00028-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..15e444843fc91d96ca903cb006caf5d99846de73
--- /dev/null
+++ b/model-00028-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2301da5402b74812725f99d892af657a421d7d3ff1fedf3309c28aa0fa0e4353
+size 4248207640
diff --git a/model-00029-of-00073.safetensors b/model-00029-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..51d431b2535c32e8817a8b1e8b96481c7c9ca351
--- /dev/null
+++ b/model-00029-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c47a3203982fad66d9f6ab6796fc77b6b21618300b0e2f6bf2a66a286997b2fe
+size 2177954752
diff --git a/model-00030-of-00073.safetensors b/model-00030-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..934dcb5bfa155c571fe3ab8cdbabffce45ca1824
--- /dev/null
+++ b/model-00030-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad8c1f5337c4b372da5d49cfeca7996e85cf3f8af0323359abfc5a38539db4d1
+size 4248207640
diff --git a/model-00031-of-00073.safetensors b/model-00031-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bec842087305b16e52ce84b0645a651fee3ee821
--- /dev/null
+++ b/model-00031-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85ca72202a5feb02440691e695b876f5e9e65c3c006d6afe9296af5463e01cb1
+size 2177954752
diff --git a/model-00032-of-00073.safetensors b/model-00032-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3cc323a0b5cf83aeb072e6aeee414ddbb742a8e3
--- /dev/null
+++ b/model-00032-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a9dc8df6da9507d6f99547ed7b6ab17793cbcd5a81c0c2f3439ff8f6c1d10c9
+size 4248207640
diff --git a/model-00033-of-00073.safetensors b/model-00033-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b5d8ec8c5524aa550e123b54f08e4c7d38d42e1
--- /dev/null
+++ b/model-00033-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7386c57beb266ce71d68b7253cef5c313d192d55323e5d30423f23f11460d05
+size 2177954752
diff --git a/model-00034-of-00073.safetensors b/model-00034-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..088d89c3ce46312b5fb168b42c9b138975af0fdd
--- /dev/null
+++ b/model-00034-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2278f08494e6ef08a3e4b5794fc650970b01bb6652502c3bf861d9ee39024d52
+size 4248207640
diff --git a/model-00035-of-00073.safetensors b/model-00035-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3c65c2e233d804b4143d71c92107949eb6e0f7f5
--- /dev/null
+++ b/model-00035-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15739b598461d846f936c2a6075f974e01e222bf411bc0e53e088bbe93e0b104
+size 2177954752
diff --git a/model-00036-of-00073.safetensors b/model-00036-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a8e95409219a417441b9a780689e1c0c90bf0d3c
--- /dev/null
+++ b/model-00036-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d28428c507c8b4e042c656f2db6c30fc9db7dd6f6c0157dcda930c2a3ea4d11c
+size 4248207640
diff --git a/model-00037-of-00073.safetensors b/model-00037-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..08b552c06ad28a3b5e7cd37c8b1b9d4e6067cef4
--- /dev/null
+++ b/model-00037-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ec0956fcc77e688fbe5ea6e546d0c5038d72ed1a2deb9f6e46e329b68e710e5
+size 2177954752
diff --git a/model-00038-of-00073.safetensors b/model-00038-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..849397898f8f12af6c3f25eb4035ca339cbf60ca
--- /dev/null
+++ b/model-00038-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9704e3b5209e51b7310530e1da1e31b4aafb290a21b1957990b59ee98c08312f
+size 4248207640
diff --git a/model-00039-of-00073.safetensors b/model-00039-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d120dd72f034bfeb02ce364f37bdbb12dfa763c2
--- /dev/null
+++ b/model-00039-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cfa2f49ec8728ff8c1b5221c76d26d6362c559ad3277545dc645c382a6f3952
+size 2177954752
diff --git a/model-00040-of-00073.safetensors b/model-00040-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..87db2650c968dc085ff0badeaee1c644084e9842
--- /dev/null
+++ b/model-00040-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63132593ac1bd4883db66340d38ef61685065e53db5ac40276bbfc1d83fb8bb8
+size 4248207640
diff --git a/model-00041-of-00073.safetensors b/model-00041-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e65ca79b0f793f3de9a7f4532c81fc23fb9752cb
--- /dev/null
+++ b/model-00041-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b83f482b8e161a9549bfaed8ed33855ba1a7fabeb27f24a14a086002018ae6f9
+size 2177954752
diff --git a/model-00042-of-00073.safetensors b/model-00042-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e077fbf39e8f59fb092cad7e9beb0f11a35d88a2
--- /dev/null
+++ b/model-00042-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e39c7084ce12ee967b6dd15c8e1849fa550d50fd8e7d123064c3b3b5a233118a
+size 4248207640
diff --git a/model-00043-of-00073.safetensors b/model-00043-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..97c15e6ef75d6c1dedb5f9cd73e546fbd7693b42
--- /dev/null
+++ b/model-00043-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d185adfc71a17ae91600415a2efd0db8e1c69c191346580474e6c3f167b99048
+size 2177954752
diff --git a/model-00044-of-00073.safetensors b/model-00044-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dff09063452d8a0763e6ca7f2332d30c1df12eff
--- /dev/null
+++ b/model-00044-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a3cfc8f6772d9ec8ccbbd1213069d0b8213d1f7bf4658011ac9fcb12504e941
+size 4248207640
diff --git a/model-00045-of-00073.safetensors b/model-00045-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6108a9ccccabb878336bfa8cbcf4997fb97fb49c
--- /dev/null
+++ b/model-00045-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa18e7560c99140049dbf610385e76a844c6bc28933bbdfc9e7abfed3d5090f2
+size 2177954752
diff --git a/model-00046-of-00073.safetensors b/model-00046-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f594f984ea46eff3d7723c171a02dd4f2378589
--- /dev/null
+++ b/model-00046-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1939048181675b836a1f11862169988e54ecf788d4b74421f664052d565c07a
+size 4248207640
diff --git a/model-00047-of-00073.safetensors b/model-00047-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1c54938da594de135299907e7430643424ce9197
--- /dev/null
+++ b/model-00047-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f09a7f36b36546ff97bce5e7ac901fd73179976c7ff34263c39ec931645a0588
+size 2177954752
diff --git a/model-00048-of-00073.safetensors b/model-00048-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0683c5739356157326426e735f6e45b0ac3999c6
--- /dev/null
+++ b/model-00048-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b5ff6161f070bca5bf419c49123a327e601f1526c1256c0f9151d3e1f53b4fa
+size 4248207640
diff --git a/model-00049-of-00073.safetensors b/model-00049-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..044b323dc509b24195b1699bed8ba8d7a5a0c1ed
--- /dev/null
+++ b/model-00049-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:476354b9803bbc6f357d3144ad9d9c9e96c0dc7f3fbcaacfc1caac9441a194c8
+size 2177954752
diff --git a/model-00050-of-00073.safetensors b/model-00050-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ec92fb99018bb46f07247e1d0cb26ae87e64ccce
--- /dev/null
+++ b/model-00050-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65373fb48bed15e6db585aec180e53098ef5d59368265be9aa54da55f9ae0e05
+size 4248207640
diff --git a/model-00051-of-00073.safetensors b/model-00051-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d9b82039ee17b41c32ff83bfea976d8e344424e6
--- /dev/null
+++ b/model-00051-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a71f4a99d958a9fc7a716c9e106dac8f448db6ee14b8d7e0e57eae0122ea00e
+size 2177954752
diff --git a/model-00052-of-00073.safetensors b/model-00052-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..19291c6fd6b13eefbf7b721a2bf67b482efd69c6
--- /dev/null
+++ b/model-00052-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa3fbc51fb6592d3a61b1769c06c2a922fbe396432626191e2b036de3e507d80
+size 4248207640
diff --git a/model-00053-of-00073.safetensors b/model-00053-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..324c5f25a0eca92b9ef1532fe8b8c620c5c21ef0
--- /dev/null
+++ b/model-00053-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6a621d75fb95d4509f54593b5509a08d6912febbfeece456c54fdaeef9376d1
+size 2177954752
diff --git a/model-00054-of-00073.safetensors b/model-00054-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d93e6623a9cb4e3de6bf754dc568c3d43496a8c0
--- /dev/null
+++ b/model-00054-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0531a970dd6c5d8e7cbae562255f6a3a9436546c311a29602e0fcc0fd8a84e3
+size 4248207640
diff --git a/model-00055-of-00073.safetensors b/model-00055-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c3e8273ed4da7873b6da9e38e827611742a49d99
--- /dev/null
+++ b/model-00055-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d314fff0d023b1877c87f801756b7352f67d360223743a378cd4a2cadbbf66dd
+size 2177954752
diff --git a/model-00056-of-00073.safetensors b/model-00056-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1e7113d9e7d57947433e98a77c58c13231e34c68
--- /dev/null
+++ b/model-00056-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b05fa63d01ec993405b2926e45798aea2b185200c2f008aeeaea68d0a6945c98
+size 4248207640
diff --git a/model-00057-of-00073.safetensors b/model-00057-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c50d4b89ee202c6b87696c39a926acdd31f7a27c
--- /dev/null
+++ b/model-00057-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c46bf0a556a4554259e476a913f0b5bdcd9ba2e262f070e2d896a74d41e1360c
+size 2177954752
diff --git a/model-00058-of-00073.safetensors b/model-00058-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6bad9865380e13fc1ab55d7135f68a43edfa0b21
--- /dev/null
+++ b/model-00058-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b8641d3b84630260b190480647bc640fd122dbe24a6dd465b6812a17ba73989
+size 4248207640
diff --git a/model-00059-of-00073.safetensors b/model-00059-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42381cc83815a0a5bf12e3370035e87cb6d16d7c
--- /dev/null
+++ b/model-00059-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d31e1ac5a774b4889ed1361b1b6d4b091872984395ce7c766d49579c9222f11
+size 2177954752
diff --git a/model-00060-of-00073.safetensors b/model-00060-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ff2ead735964c12ba27e21fd67e94edcc02baedc
--- /dev/null
+++ b/model-00060-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e2351f41533aec995d9780f0c6a4b47279ab381e02709b46fe618487503602a
+size 4248207640
diff --git a/model-00061-of-00073.safetensors b/model-00061-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49d1f38217e6a43cbb994a7a8180382b9a192440
--- /dev/null
+++ b/model-00061-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92418a51634879c2d40134d135669065f5eec1256d97c89a76b839eef2a8c07d
+size 2177954752
diff --git a/model-00062-of-00073.safetensors b/model-00062-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c2d3cf4ccf45a0fc5ac5dae3961e74f5c2493d3d
--- /dev/null
+++ b/model-00062-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d21973e03b03ac1b40821ecfe3e5c184ad04e6ef527e0b0a44a2f1879414f3ef
+size 4248207640
diff --git a/model-00063-of-00073.safetensors b/model-00063-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f5a556c29cf5eae92fa7bb053210b3a28cdcca1e
--- /dev/null
+++ b/model-00063-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c6a3b7e49f2fa2ecd9c9345d5a493fc1ef4ebce6647a52d49aa7fddf529cc6f
+size 2177954752
diff --git a/model-00064-of-00073.safetensors b/model-00064-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6bd945a93777070b19bc0f2d1d125ad656effafc
--- /dev/null
+++ b/model-00064-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7435c449bd45bf5a50e92bc22767fb6114afaaa5f3d12f0948649ff3eaad2353
+size 4248207640
diff --git a/model-00065-of-00073.safetensors b/model-00065-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e21fd4edb089f9fe3ad4cc2f4a55ac9a53221b75
--- /dev/null
+++ b/model-00065-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e9d23e0d62c92ba2bd4a34846b2a33ce9a830f09e8fd84fd2250dbbfa2df9de
+size 2177954752
diff --git a/model-00066-of-00073.safetensors b/model-00066-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..db0d65e36e3aff15a2a3698ca42bc5af09df6b40
--- /dev/null
+++ b/model-00066-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bbcd434c09d76339b3985c613662226dae9548395066eed1fbde54062e74932
+size 4248207640
diff --git a/model-00067-of-00073.safetensors b/model-00067-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..16120f3df21f91dc8881985de974cc6b44caea4d
--- /dev/null
+++ b/model-00067-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f43a3644008aa50f06e080fad393fb36f66557b92bb8f792cdb40073f594723
+size 2177954752
diff --git a/model-00068-of-00073.safetensors b/model-00068-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d580dfb8ebe5560b1f300502947c2dd298896949
--- /dev/null
+++ b/model-00068-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1f91f706f2bb4eb1d88b80208fafd72879fb5501bedc2fa87f85c4e2e6aa0ee
+size 4248207640
diff --git a/model-00069-of-00073.safetensors b/model-00069-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..62d6518fa2a702d21c29a4bf04e1153e106cc8fa
--- /dev/null
+++ b/model-00069-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52f158e7b07457d7010ee6aca6621abfe008d0b76654956b45058bb0bdaa746
+size 2177954752
diff --git a/model-00070-of-00073.safetensors b/model-00070-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..12f4adbd05ea78a9774d4fb7818c600e720e62e5
--- /dev/null
+++ b/model-00070-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:964669a60753219bb8f516f22d2985b609743436f665afa55dc9c95c40f0dd06
+size 4248207640
diff --git a/model-00071-of-00073.safetensors b/model-00071-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5217ab608c8514744b35323a4ff3f7381a106df6
--- /dev/null
+++ b/model-00071-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f83eef64d21bc83cc72982a87a50ac16954b18bb7cf8d9be16c908ff7680432e
+size 2177954752
diff --git a/model-00072-of-00073.safetensors b/model-00072-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6379f048d14c4ed96435e6ce872ae048608ac57e
--- /dev/null
+++ b/model-00072-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38d5e2198840177789319fc6ac995273dfe0b6b72325e40f641669206f49d8f3
+size 4248207640
diff --git a/model-00073-of-00073.safetensors b/model-00073-of-00073.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eaa45d8f37c71b0136457d89a39751cf452389ce
--- /dev/null
+++ b/model-00073-of-00073.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e493d8c2d647686efbe51251c6c2bf7473dc98090b99bbe194eacfac3302c675
+size 3282388536
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..ac32376906551efb22fd6405ca90ba5c5c4e90ec
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,623 @@
+{
+  "metadata": {
+    "total_parameters": 116829156672,
+    "total_size": 233658313344
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00073-of-00073.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00073.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00003-of-00073.safetensors",
+    "model.layers.0.mlp.experts.down_proj": "model-00003-of-00073.safetensors",
+    "model.layers.0.mlp.experts.down_proj_bias": "model-00003-of-00073.safetensors",
+    "model.layers.0.mlp.experts.gate_up_proj": "model-00002-of-00073.safetensors",
+    "model.layers.0.mlp.experts.gate_up_proj_bias": "model-00002-of-00073.safetensors",
+    "model.layers.0.mlp.router.bias": "model-00001-of-00073.safetensors",
+    "model.layers.0.mlp.router.weight": "model-00001-of-00073.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00073.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.o_proj.bias": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.sinks": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00073.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00073.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00005-of-00073.safetensors",
+    "model.layers.1.mlp.experts.down_proj": "model-00005-of-00073.safetensors",
+    "model.layers.1.mlp.experts.down_proj_bias": "model-00005-of-00073.safetensors",
+    "model.layers.1.mlp.experts.gate_up_proj": "model-00004-of-00073.safetensors",
+    "model.layers.1.mlp.experts.gate_up_proj_bias": "model-00004-of-00073.safetensors",
+    "model.layers.1.mlp.router.bias": "model-00003-of-00073.safetensors",
+    "model.layers.1.mlp.router.weight": "model-00003-of-00073.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00005-of-00073.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.o_proj.bias": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.sinks": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00003-of-00073.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00073.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00023-of-00073.safetensors",
+    "model.layers.10.mlp.experts.down_proj": "model-00023-of-00073.safetensors",
+    "model.layers.10.mlp.experts.down_proj_bias": "model-00023-of-00073.safetensors",
+    "model.layers.10.mlp.experts.gate_up_proj": "model-00022-of-00073.safetensors",
+    "model.layers.10.mlp.experts.gate_up_proj_bias": "model-00022-of-00073.safetensors",
+    "model.layers.10.mlp.router.bias": "model-00021-of-00073.safetensors",
+    "model.layers.10.mlp.router.weight": "model-00021-of-00073.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00023-of-00073.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.o_proj.bias": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.sinks": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00021-of-00073.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00021-of-00073.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00025-of-00073.safetensors",
+    "model.layers.11.mlp.experts.down_proj": "model-00025-of-00073.safetensors",
+    "model.layers.11.mlp.experts.down_proj_bias": "model-00025-of-00073.safetensors",
+    "model.layers.11.mlp.experts.gate_up_proj": "model-00024-of-00073.safetensors",
+    "model.layers.11.mlp.experts.gate_up_proj_bias": "model-00024-of-00073.safetensors",
+    "model.layers.11.mlp.router.bias": "model-00023-of-00073.safetensors",
+    "model.layers.11.mlp.router.weight": "model-00023-of-00073.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00025-of-00073.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.o_proj.bias": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.sinks": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00023-of-00073.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00023-of-00073.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00027-of-00073.safetensors",
+    "model.layers.12.mlp.experts.down_proj": "model-00027-of-00073.safetensors",
+    "model.layers.12.mlp.experts.down_proj_bias": "model-00027-of-00073.safetensors",
+    "model.layers.12.mlp.experts.gate_up_proj": "model-00026-of-00073.safetensors",
+    "model.layers.12.mlp.experts.gate_up_proj_bias": "model-00026-of-00073.safetensors",
+    "model.layers.12.mlp.router.bias": "model-00025-of-00073.safetensors",
+    "model.layers.12.mlp.router.weight": "model-00025-of-00073.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00027-of-00073.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.o_proj.bias": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.sinks": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00025-of-00073.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00025-of-00073.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00029-of-00073.safetensors",
+    "model.layers.13.mlp.experts.down_proj": "model-00029-of-00073.safetensors",
+    "model.layers.13.mlp.experts.down_proj_bias": "model-00029-of-00073.safetensors",
+    "model.layers.13.mlp.experts.gate_up_proj": "model-00028-of-00073.safetensors",
+    "model.layers.13.mlp.experts.gate_up_proj_bias": "model-00028-of-00073.safetensors",
+    "model.layers.13.mlp.router.bias": "model-00027-of-00073.safetensors",
+    "model.layers.13.mlp.router.weight": "model-00027-of-00073.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00029-of-00073.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.o_proj.bias": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.sinks": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00027-of-00073.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00027-of-00073.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00031-of-00073.safetensors",
+    "model.layers.14.mlp.experts.down_proj": "model-00031-of-00073.safetensors",
+    "model.layers.14.mlp.experts.down_proj_bias": "model-00031-of-00073.safetensors",
+    "model.layers.14.mlp.experts.gate_up_proj": "model-00030-of-00073.safetensors",
+    "model.layers.14.mlp.experts.gate_up_proj_bias": "model-00030-of-00073.safetensors",
+    "model.layers.14.mlp.router.bias": "model-00029-of-00073.safetensors",
+    "model.layers.14.mlp.router.weight": "model-00029-of-00073.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00031-of-00073.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.o_proj.bias": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.sinks": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00029-of-00073.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00029-of-00073.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00033-of-00073.safetensors",
+    "model.layers.15.mlp.experts.down_proj": "model-00033-of-00073.safetensors",
+    "model.layers.15.mlp.experts.down_proj_bias": "model-00033-of-00073.safetensors",
+    "model.layers.15.mlp.experts.gate_up_proj": "model-00032-of-00073.safetensors",
+    "model.layers.15.mlp.experts.gate_up_proj_bias": "model-00032-of-00073.safetensors",
+    "model.layers.15.mlp.router.bias": "model-00031-of-00073.safetensors",
+    "model.layers.15.mlp.router.weight": "model-00031-of-00073.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00033-of-00073.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.o_proj.bias": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.sinks": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00031-of-00073.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00031-of-00073.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00035-of-00073.safetensors",
+    "model.layers.16.mlp.experts.down_proj": "model-00035-of-00073.safetensors",
+    "model.layers.16.mlp.experts.down_proj_bias": "model-00035-of-00073.safetensors",
+    "model.layers.16.mlp.experts.gate_up_proj": "model-00034-of-00073.safetensors",
+    "model.layers.16.mlp.experts.gate_up_proj_bias": "model-00034-of-00073.safetensors",
+    "model.layers.16.mlp.router.bias": "model-00033-of-00073.safetensors",
+    "model.layers.16.mlp.router.weight": "model-00033-of-00073.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00035-of-00073.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.o_proj.bias": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.sinks": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00033-of-00073.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00033-of-00073.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00037-of-00073.safetensors",
+    "model.layers.17.mlp.experts.down_proj": "model-00037-of-00073.safetensors",
+    "model.layers.17.mlp.experts.down_proj_bias": "model-00037-of-00073.safetensors",
+    "model.layers.17.mlp.experts.gate_up_proj": "model-00036-of-00073.safetensors",
+    "model.layers.17.mlp.experts.gate_up_proj_bias": "model-00036-of-00073.safetensors",
+    "model.layers.17.mlp.router.bias": "model-00035-of-00073.safetensors",
+    "model.layers.17.mlp.router.weight": "model-00035-of-00073.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00037-of-00073.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.o_proj.bias": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.sinks": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00035-of-00073.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00035-of-00073.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00039-of-00073.safetensors",
+    "model.layers.18.mlp.experts.down_proj": "model-00039-of-00073.safetensors",
+    "model.layers.18.mlp.experts.down_proj_bias": "model-00039-of-00073.safetensors",
+    "model.layers.18.mlp.experts.gate_up_proj": "model-00038-of-00073.safetensors",
+    "model.layers.18.mlp.experts.gate_up_proj_bias": "model-00038-of-00073.safetensors",
+    "model.layers.18.mlp.router.bias": "model-00037-of-00073.safetensors",
+    "model.layers.18.mlp.router.weight": "model-00037-of-00073.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00039-of-00073.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.o_proj.bias": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.sinks": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00037-of-00073.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00037-of-00073.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00041-of-00073.safetensors",
+    "model.layers.19.mlp.experts.down_proj": "model-00041-of-00073.safetensors",
+    "model.layers.19.mlp.experts.down_proj_bias": "model-00041-of-00073.safetensors",
+    "model.layers.19.mlp.experts.gate_up_proj": "model-00040-of-00073.safetensors",
+    "model.layers.19.mlp.experts.gate_up_proj_bias": "model-00040-of-00073.safetensors",
+    "model.layers.19.mlp.router.bias": "model-00039-of-00073.safetensors",
+    "model.layers.19.mlp.router.weight": "model-00039-of-00073.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00041-of-00073.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.o_proj.bias": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.sinks": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00039-of-00073.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00039-of-00073.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00007-of-00073.safetensors",
+    "model.layers.2.mlp.experts.down_proj": "model-00007-of-00073.safetensors",
+    "model.layers.2.mlp.experts.down_proj_bias": "model-00007-of-00073.safetensors",
+    "model.layers.2.mlp.experts.gate_up_proj": "model-00006-of-00073.safetensors",
+    "model.layers.2.mlp.experts.gate_up_proj_bias": "model-00006-of-00073.safetensors",
+    "model.layers.2.mlp.router.bias": "model-00005-of-00073.safetensors",
+    "model.layers.2.mlp.router.weight": "model-00005-of-00073.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00007-of-00073.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.o_proj.bias": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.sinks": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00005-of-00073.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00005-of-00073.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00043-of-00073.safetensors",
+    "model.layers.20.mlp.experts.down_proj": "model-00043-of-00073.safetensors",
+    "model.layers.20.mlp.experts.down_proj_bias": "model-00043-of-00073.safetensors",
+    "model.layers.20.mlp.experts.gate_up_proj": "model-00042-of-00073.safetensors",
+    "model.layers.20.mlp.experts.gate_up_proj_bias": "model-00042-of-00073.safetensors",
+    "model.layers.20.mlp.router.bias": "model-00041-of-00073.safetensors",
+    "model.layers.20.mlp.router.weight": "model-00041-of-00073.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00043-of-00073.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.o_proj.bias": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.sinks": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00041-of-00073.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00041-of-00073.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00045-of-00073.safetensors",
+    "model.layers.21.mlp.experts.down_proj": "model-00045-of-00073.safetensors",
+    "model.layers.21.mlp.experts.down_proj_bias": "model-00045-of-00073.safetensors",
+    "model.layers.21.mlp.experts.gate_up_proj": "model-00044-of-00073.safetensors",
+    "model.layers.21.mlp.experts.gate_up_proj_bias": "model-00044-of-00073.safetensors",
+    "model.layers.21.mlp.router.bias": "model-00043-of-00073.safetensors",
+    "model.layers.21.mlp.router.weight": "model-00043-of-00073.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00045-of-00073.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.o_proj.bias": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.sinks": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00043-of-00073.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00043-of-00073.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00047-of-00073.safetensors",
+    "model.layers.22.mlp.experts.down_proj": "model-00047-of-00073.safetensors",
+    "model.layers.22.mlp.experts.down_proj_bias": "model-00047-of-00073.safetensors",
+    "model.layers.22.mlp.experts.gate_up_proj": "model-00046-of-00073.safetensors",
+    "model.layers.22.mlp.experts.gate_up_proj_bias": "model-00046-of-00073.safetensors",
+    "model.layers.22.mlp.router.bias": "model-00045-of-00073.safetensors",
+    "model.layers.22.mlp.router.weight": "model-00045-of-00073.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00047-of-00073.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.o_proj.bias": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.sinks": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00045-of-00073.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00045-of-00073.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00049-of-00073.safetensors",
+    "model.layers.23.mlp.experts.down_proj": "model-00049-of-00073.safetensors",
+    "model.layers.23.mlp.experts.down_proj_bias": "model-00049-of-00073.safetensors",
+    "model.layers.23.mlp.experts.gate_up_proj": "model-00048-of-00073.safetensors",
+    "model.layers.23.mlp.experts.gate_up_proj_bias": "model-00048-of-00073.safetensors",
+    "model.layers.23.mlp.router.bias": "model-00047-of-00073.safetensors",
+    "model.layers.23.mlp.router.weight": "model-00047-of-00073.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00049-of-00073.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.o_proj.bias": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.sinks": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00047-of-00073.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00047-of-00073.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00051-of-00073.safetensors",
+    "model.layers.24.mlp.experts.down_proj": "model-00051-of-00073.safetensors",
+    "model.layers.24.mlp.experts.down_proj_bias": "model-00051-of-00073.safetensors",
+    "model.layers.24.mlp.experts.gate_up_proj": "model-00050-of-00073.safetensors",
+    "model.layers.24.mlp.experts.gate_up_proj_bias": "model-00050-of-00073.safetensors",
+    "model.layers.24.mlp.router.bias": "model-00049-of-00073.safetensors",
+    "model.layers.24.mlp.router.weight": "model-00049-of-00073.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00051-of-00073.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.o_proj.bias": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.sinks": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00049-of-00073.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00049-of-00073.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00053-of-00073.safetensors",
+    "model.layers.25.mlp.experts.down_proj": "model-00053-of-00073.safetensors",
+    "model.layers.25.mlp.experts.down_proj_bias": "model-00053-of-00073.safetensors",
+    "model.layers.25.mlp.experts.gate_up_proj": "model-00052-of-00073.safetensors",
+    "model.layers.25.mlp.experts.gate_up_proj_bias": "model-00052-of-00073.safetensors",
+    "model.layers.25.mlp.router.bias": "model-00051-of-00073.safetensors",
+    "model.layers.25.mlp.router.weight": "model-00051-of-00073.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00053-of-00073.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.o_proj.bias": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.sinks": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00051-of-00073.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00051-of-00073.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00055-of-00073.safetensors",
+    "model.layers.26.mlp.experts.down_proj": "model-00055-of-00073.safetensors",
+    "model.layers.26.mlp.experts.down_proj_bias": "model-00055-of-00073.safetensors",
+    "model.layers.26.mlp.experts.gate_up_proj": "model-00054-of-00073.safetensors",
+    "model.layers.26.mlp.experts.gate_up_proj_bias": "model-00054-of-00073.safetensors",
+    "model.layers.26.mlp.router.bias": "model-00053-of-00073.safetensors",
+    "model.layers.26.mlp.router.weight": "model-00053-of-00073.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00055-of-00073.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.o_proj.bias": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.sinks": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00053-of-00073.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00053-of-00073.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00057-of-00073.safetensors",
+    "model.layers.27.mlp.experts.down_proj": "model-00057-of-00073.safetensors",
+    "model.layers.27.mlp.experts.down_proj_bias": "model-00057-of-00073.safetensors",
+    "model.layers.27.mlp.experts.gate_up_proj": "model-00056-of-00073.safetensors",
+    "model.layers.27.mlp.experts.gate_up_proj_bias": "model-00056-of-00073.safetensors",
+    "model.layers.27.mlp.router.bias": "model-00055-of-00073.safetensors",
+    "model.layers.27.mlp.router.weight": "model-00055-of-00073.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00057-of-00073.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.o_proj.bias": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.sinks": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00055-of-00073.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00055-of-00073.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00059-of-00073.safetensors",
+    "model.layers.28.mlp.experts.down_proj": "model-00059-of-00073.safetensors",
+    "model.layers.28.mlp.experts.down_proj_bias": "model-00059-of-00073.safetensors",
+    "model.layers.28.mlp.experts.gate_up_proj": "model-00058-of-00073.safetensors",
+    "model.layers.28.mlp.experts.gate_up_proj_bias": "model-00058-of-00073.safetensors",
+    "model.layers.28.mlp.router.bias": "model-00057-of-00073.safetensors",
+    "model.layers.28.mlp.router.weight": "model-00057-of-00073.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00059-of-00073.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.o_proj.bias": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.sinks": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00057-of-00073.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00057-of-00073.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00061-of-00073.safetensors",
+    "model.layers.29.mlp.experts.down_proj": "model-00061-of-00073.safetensors",
+    "model.layers.29.mlp.experts.down_proj_bias": "model-00061-of-00073.safetensors",
+    "model.layers.29.mlp.experts.gate_up_proj": "model-00060-of-00073.safetensors",
+    "model.layers.29.mlp.experts.gate_up_proj_bias": "model-00060-of-00073.safetensors",
+    "model.layers.29.mlp.router.bias": "model-00059-of-00073.safetensors",
+    "model.layers.29.mlp.router.weight": "model-00059-of-00073.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00061-of-00073.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.o_proj.bias": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.sinks": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00059-of-00073.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00059-of-00073.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00009-of-00073.safetensors",
+    "model.layers.3.mlp.experts.down_proj": "model-00009-of-00073.safetensors",
+    "model.layers.3.mlp.experts.down_proj_bias": "model-00009-of-00073.safetensors",
+    "model.layers.3.mlp.experts.gate_up_proj": "model-00008-of-00073.safetensors",
+    "model.layers.3.mlp.experts.gate_up_proj_bias": "model-00008-of-00073.safetensors",
+    "model.layers.3.mlp.router.bias": "model-00007-of-00073.safetensors",
+    "model.layers.3.mlp.router.weight": "model-00007-of-00073.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00009-of-00073.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.o_proj.bias": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.sinks": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00007-of-00073.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00007-of-00073.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00063-of-00073.safetensors",
+    "model.layers.30.mlp.experts.down_proj": "model-00063-of-00073.safetensors",
+    "model.layers.30.mlp.experts.down_proj_bias": "model-00063-of-00073.safetensors",
+    "model.layers.30.mlp.experts.gate_up_proj": "model-00062-of-00073.safetensors",
+    "model.layers.30.mlp.experts.gate_up_proj_bias": "model-00062-of-00073.safetensors",
+    "model.layers.30.mlp.router.bias": "model-00061-of-00073.safetensors",
+    "model.layers.30.mlp.router.weight": "model-00061-of-00073.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00063-of-00073.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.o_proj.bias": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.sinks": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00061-of-00073.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00061-of-00073.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00065-of-00073.safetensors",
+    "model.layers.31.mlp.experts.down_proj": "model-00065-of-00073.safetensors",
+    "model.layers.31.mlp.experts.down_proj_bias": "model-00065-of-00073.safetensors",
+    "model.layers.31.mlp.experts.gate_up_proj": "model-00064-of-00073.safetensors",
+    "model.layers.31.mlp.experts.gate_up_proj_bias": "model-00064-of-00073.safetensors",
+    "model.layers.31.mlp.router.bias": "model-00063-of-00073.safetensors",
+    "model.layers.31.mlp.router.weight": "model-00063-of-00073.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00065-of-00073.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.o_proj.bias": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.sinks": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00063-of-00073.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00063-of-00073.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00067-of-00073.safetensors",
+    "model.layers.32.mlp.experts.down_proj": "model-00067-of-00073.safetensors",
+    "model.layers.32.mlp.experts.down_proj_bias": "model-00067-of-00073.safetensors",
+    "model.layers.32.mlp.experts.gate_up_proj": "model-00066-of-00073.safetensors",
+    "model.layers.32.mlp.experts.gate_up_proj_bias": "model-00066-of-00073.safetensors",
+    "model.layers.32.mlp.router.bias": "model-00065-of-00073.safetensors",
+    "model.layers.32.mlp.router.weight": "model-00065-of-00073.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00067-of-00073.safetensors",
+    "model.layers.32.self_attn.k_proj.bias": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.o_proj.bias": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.q_proj.bias": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.sinks": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.v_proj.bias": "model-00065-of-00073.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00065-of-00073.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00069-of-00073.safetensors",
+    "model.layers.33.mlp.experts.down_proj": "model-00069-of-00073.safetensors",
+    "model.layers.33.mlp.experts.down_proj_bias": "model-00069-of-00073.safetensors",
+    "model.layers.33.mlp.experts.gate_up_proj": "model-00068-of-00073.safetensors",
+    "model.layers.33.mlp.experts.gate_up_proj_bias": "model-00068-of-00073.safetensors",
+    "model.layers.33.mlp.router.bias": "model-00067-of-00073.safetensors",
+    "model.layers.33.mlp.router.weight": "model-00067-of-00073.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00069-of-00073.safetensors",
+    "model.layers.33.self_attn.k_proj.bias": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.o_proj.bias": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.q_proj.bias": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.sinks": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.v_proj.bias": "model-00067-of-00073.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00067-of-00073.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00071-of-00073.safetensors",
+    "model.layers.34.mlp.experts.down_proj": "model-00071-of-00073.safetensors",
+    "model.layers.34.mlp.experts.down_proj_bias": "model-00071-of-00073.safetensors",
+    "model.layers.34.mlp.experts.gate_up_proj": "model-00070-of-00073.safetensors",
+    "model.layers.34.mlp.experts.gate_up_proj_bias": "model-00070-of-00073.safetensors",
+    "model.layers.34.mlp.router.bias": "model-00069-of-00073.safetensors",
+    "model.layers.34.mlp.router.weight": "model-00069-of-00073.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00071-of-00073.safetensors",
+    "model.layers.34.self_attn.k_proj.bias": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.k_proj.weight": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.o_proj.bias": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.o_proj.weight": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.q_proj.bias": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.q_proj.weight": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.sinks": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.v_proj.bias": "model-00069-of-00073.safetensors",
+    "model.layers.34.self_attn.v_proj.weight": "model-00069-of-00073.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00073-of-00073.safetensors",
+    "model.layers.35.mlp.experts.down_proj": "model-00073-of-00073.safetensors",
+    "model.layers.35.mlp.experts.down_proj_bias": "model-00073-of-00073.safetensors",
+    "model.layers.35.mlp.experts.gate_up_proj": "model-00072-of-00073.safetensors",
+    "model.layers.35.mlp.experts.gate_up_proj_bias": "model-00072-of-00073.safetensors",
+    "model.layers.35.mlp.router.bias": "model-00071-of-00073.safetensors",
+    "model.layers.35.mlp.router.weight": "model-00071-of-00073.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00073-of-00073.safetensors",
+    "model.layers.35.self_attn.k_proj.bias": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.k_proj.weight": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.o_proj.bias": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.o_proj.weight": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.q_proj.bias": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.q_proj.weight": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.sinks": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.v_proj.bias": "model-00071-of-00073.safetensors",
+    "model.layers.35.self_attn.v_proj.weight": "model-00071-of-00073.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00011-of-00073.safetensors",
+    "model.layers.4.mlp.experts.down_proj": "model-00011-of-00073.safetensors",
+    "model.layers.4.mlp.experts.down_proj_bias": "model-00011-of-00073.safetensors",
+    "model.layers.4.mlp.experts.gate_up_proj": "model-00010-of-00073.safetensors",
+    "model.layers.4.mlp.experts.gate_up_proj_bias": "model-00010-of-00073.safetensors",
+    "model.layers.4.mlp.router.bias": "model-00009-of-00073.safetensors",
+    "model.layers.4.mlp.router.weight": "model-00009-of-00073.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00011-of-00073.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.o_proj.bias": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.sinks": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00009-of-00073.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00009-of-00073.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00013-of-00073.safetensors",
+    "model.layers.5.mlp.experts.down_proj": "model-00013-of-00073.safetensors",
+    "model.layers.5.mlp.experts.down_proj_bias": "model-00013-of-00073.safetensors",
+    "model.layers.5.mlp.experts.gate_up_proj": "model-00012-of-00073.safetensors",
+    "model.layers.5.mlp.experts.gate_up_proj_bias": "model-00012-of-00073.safetensors",
+    "model.layers.5.mlp.router.bias": "model-00011-of-00073.safetensors",
+    "model.layers.5.mlp.router.weight": "model-00011-of-00073.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00013-of-00073.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.o_proj.bias": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.sinks": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00011-of-00073.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00011-of-00073.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00015-of-00073.safetensors",
+    "model.layers.6.mlp.experts.down_proj": "model-00015-of-00073.safetensors",
+    "model.layers.6.mlp.experts.down_proj_bias": "model-00015-of-00073.safetensors",
+    "model.layers.6.mlp.experts.gate_up_proj": "model-00014-of-00073.safetensors",
+    "model.layers.6.mlp.experts.gate_up_proj_bias": "model-00014-of-00073.safetensors",
+    "model.layers.6.mlp.router.bias": "model-00013-of-00073.safetensors",
+    "model.layers.6.mlp.router.weight": "model-00013-of-00073.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00015-of-00073.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.o_proj.bias": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.sinks": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00013-of-00073.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00013-of-00073.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00017-of-00073.safetensors",
+    "model.layers.7.mlp.experts.down_proj": "model-00017-of-00073.safetensors",
+    "model.layers.7.mlp.experts.down_proj_bias": "model-00017-of-00073.safetensors",
+    "model.layers.7.mlp.experts.gate_up_proj": "model-00016-of-00073.safetensors",
+    "model.layers.7.mlp.experts.gate_up_proj_bias": "model-00016-of-00073.safetensors",
+    "model.layers.7.mlp.router.bias": "model-00015-of-00073.safetensors",
+    "model.layers.7.mlp.router.weight": "model-00015-of-00073.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00017-of-00073.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.o_proj.bias": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.sinks": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00015-of-00073.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00015-of-00073.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00019-of-00073.safetensors",
+    "model.layers.8.mlp.experts.down_proj": "model-00019-of-00073.safetensors",
+    "model.layers.8.mlp.experts.down_proj_bias": "model-00019-of-00073.safetensors",
+    "model.layers.8.mlp.experts.gate_up_proj": "model-00018-of-00073.safetensors",
+    "model.layers.8.mlp.experts.gate_up_proj_bias": "model-00018-of-00073.safetensors",
+    "model.layers.8.mlp.router.bias": "model-00017-of-00073.safetensors",
+    "model.layers.8.mlp.router.weight": "model-00017-of-00073.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00019-of-00073.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.o_proj.bias": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.sinks": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00017-of-00073.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00017-of-00073.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00021-of-00073.safetensors",
+    "model.layers.9.mlp.experts.down_proj": "model-00021-of-00073.safetensors",
+    "model.layers.9.mlp.experts.down_proj_bias": "model-00021-of-00073.safetensors",
+    "model.layers.9.mlp.experts.gate_up_proj": "model-00020-of-00073.safetensors",
+    "model.layers.9.mlp.experts.gate_up_proj_bias": "model-00020-of-00073.safetensors",
+    "model.layers.9.mlp.router.bias": "model-00019-of-00073.safetensors",
+    "model.layers.9.mlp.router.weight": "model-00019-of-00073.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00021-of-00073.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.o_proj.bias": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.sinks": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00019-of-00073.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00019-of-00073.safetensors",
+    "model.norm.weight": "model-00073-of-00073.safetensors"
+  }
+}