diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..214d0945cfd8a92842acde433f41111825e6744e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.model filter=lfs diff=lfs merge=lfs -text diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..2d99f1588d77579b3afbbf6796478b3bc0d7ea58 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) 2025 Baidu, Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..726c5f39d7c9b54a54f0817635f7f4bd6516ac4d --- /dev/null +++ b/README.md @@ -0,0 +1,119 @@ +--- +license: apache-2.0 +language: +- en +- zh +pipeline_tag: image-text-to-text +tags: +- ERNIE4.5 +--- + +
+ + Chat + + + Hugging Face + + + Github + + + Blog + +
+ +
+ + License + +
+ +# ERNIE-4.5-VL-424B-A47B-Base + +## ERNIE 4.5 Highlights + +The advanced capabilities of the ERNIE 4.5 models, particularly the MoE-based A47B and A3B series, are underpinned by several key technical innovations: + +1. **Multimodal Heterogeneous MoE Pre-Training**: Our models are jointly trained on both textual and visual modalities to better capture the nuances of multimodal information and improve performance on tasks involving text generation, image understanding, and cross-modal reasoning. To achieve this without one modality hindering the learning of another, we designed a *heterogeneous MoE structure*, incorporated *modality-isolated routing*, and employed *router orthogonal loss* and *multimodal token-balanced loss*. These architectural choices ensure that both modalities are effectively represented, allowing for mutual reinforcement during training. + +2. **Scaling-Efficient Infrastructure**: We propose a novel heterogeneous hybrid parallelism and hierarchical load balancing strategy for efficient training of ERNIE 4.5 models. By using intra-node expert parallelism, memory-efficient pipeline scheduling, FP8 mixed-precision training and fine-grained recomputation methods, we achieve remarkable pre-training throughput. For inference, we propose Multi-Expert Parallel Collaboration method and Convolutional Code Quantization algorithm to achieve 4-bit/2-bit lossless quantization. Furthermore, we introduce PD disaggregation with dynamic role switching for effective resource utilization to enhance inference performance for ERNIE 4.5 MoE models. Built on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle), ERNIE 4.5 delivers high-performance inference across a wide range of hardware platforms. + +3. **Modality-Specific Post-training**: To meet the diverse requirements of real-world applications, we fine-tuned variants of the pretrained model for specific modalities. Our *LLMs* are optimized for general-purpose language understanding and generation. The *VLMs* focuses on visual-language understanding and supports both thinking and no-thinking mode. Each model employed a combination of *Supervised Fine-tuning (SFT)* *Direct Preference Optimization (DPO)* or a modified reinforcement learning method named *Unified Preference Optimization (UPO)* for post-training. + +To ensure the stability of multimodal joint training, we adopt a staged training strategy. In the first and second stage, we train only the text-related parameters, enabling the model to develop strong fundamental language understanding as well as long-text processing capabilities. The final multimodal stage extends capabilities to images and videos by introducing additional parameters including a ViT for image feature extraction, an adapter for feature transformation, and visual experts for multimodal understanding. At this stage, text and visual modalities mutually enhance each other. After pretraining trillions tokens, we obtained ERNIE-4.5-VL-424B-A47B-Base. + +## Model Overview + +ERNIE-4.5-VL-424B-A47B-Base is a multimodal MoE Base model, with 424B total parameters and 47B activated parameters for each token. The following are the model configuration details: + +| Key | Value | +| --------------------------------- | ------------- | +| Modality | Text & Vision | +| Training Stage | Pretraining | +| Params(Total / Activated) | 424B / 47B | +| Layers | 54 | +| Heads(Q/KV) | 64 / 8 | +| Text Experts(Total / Activated) | 64 / 8 | +| Vision Experts(Total / Activated) | 64 / 8 | +| Context Length | 131072 | + +## Benchmark + +| Capability | Benchmark | ERNIE-4.5-VL-424B-A47B-Base | GPT-4.1 | +| ----------------- | ------------------- | --------------------------- | ------- | +| Average | | | | +| Visual Perception | CVBench | | 82.49 | +| | CountBench | | | +| | RealWorldQA | | 77.25 | +| | VLMAreBlind | | | +| Knowledge | CCBench | | 78.65 | +| Chart&Doc&OCR | OCRBench | | 83.00 | +| | TableVQA | | 72.13 | +| | ChartQA | | 82.56 | +| | DocVQA(val) | | 87.84 | +| | ChartXiv-Reasoning | | 58.30 | +| Vision-Reasoning | VisualPuzzle | | 45.63 | +| | Logicvista | | | +| STEM | OlympiadBench | | 39.95 | +| | MathVista(testmini) | | 70.90 | +| | MathVerse | | 62.46 | +| | MMMU(val) | | 73.07 | +| | AI2D | | 95.34 | +| | MathVision | | 50.46 | +| Video | MVBench | | 64.15 | +| | VideoMME w/o subs | | 74.49 | +| | VideoMME w/ subs | | 78.90 | +| | MLVU | | 73.33 | +| | LongVideoBench | | 63.47 | + +## Quickstart + +### vLLM inference + +vLLM is currently being adapted, priority can be given to using our forked repository [vllm](https://github.com/CSWYF3634076/vllm/tree/ernie). We are working with the community to fully support ERNIE4.5 models, stay tuned. + +```bash +# 80G * 16 GPU +vllm serve baidu/ERNIE-4.5-VL-424B-A47B-Base-PT --trust-remote-code +``` + +## License + +The ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright © 2025 Baidu, Inc. All Rights Reserved. + +## Citation + +If you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report: + +```bibtex +@misc{ernie2025technicalreport, + title={ERNIE 4.5 Technical Report}, + author={Baidu ERNIE Team}, + year={2025}, + eprint={}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={} +} +``` diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..583ebbde43016a17315c7bcd130c025a5b04be1a --- /dev/null +++ b/added_tokens.json @@ -0,0 +1 @@ +{"<|IMAGE_PLACEHOLDER|>": 100295, "<|AUDIO_PLACEHOLDER|>": 100296, "<|LOC_0|>": 100297, "<|LOC_1|>": 100298, "<|LOC_2|>": 100299, "<|LOC_3|>": 100300, "<|LOC_4|>": 100301, "<|LOC_5|>": 100302, "<|LOC_6|>": 100303, "<|LOC_7|>": 100304, "<|LOC_8|>": 100305, "<|LOC_9|>": 100306, "<|LOC_10|>": 100307, "<|LOC_11|>": 100308, "<|LOC_12|>": 100309, "<|LOC_13|>": 100310, "<|LOC_14|>": 100311, "<|LOC_15|>": 100312, "<|LOC_16|>": 100313, "<|LOC_17|>": 100314, "<|LOC_18|>": 100315, "<|LOC_19|>": 100316, "<|LOC_20|>": 100317, "<|LOC_21|>": 100318, "<|LOC_22|>": 100319, "<|LOC_23|>": 100320, "<|LOC_24|>": 100321, "<|LOC_25|>": 100322, "<|LOC_26|>": 100323, "<|LOC_27|>": 100324, "<|LOC_28|>": 100325, "<|LOC_29|>": 100326, "<|LOC_30|>": 100327, "<|LOC_31|>": 100328, "<|LOC_32|>": 100329, "<|LOC_33|>": 100330, "<|LOC_34|>": 100331, "<|LOC_35|>": 100332, "<|LOC_36|>": 100333, "<|LOC_37|>": 100334, "<|LOC_38|>": 100335, "<|LOC_39|>": 100336, "<|LOC_40|>": 100337, "<|LOC_41|>": 100338, "<|LOC_42|>": 100339, "<|LOC_43|>": 100340, "<|LOC_44|>": 100341, "<|LOC_45|>": 100342, "<|LOC_46|>": 100343, "<|LOC_47|>": 100344, "<|LOC_48|>": 100345, "<|LOC_49|>": 100346, "<|LOC_50|>": 100347, "<|LOC_51|>": 100348, "<|LOC_52|>": 100349, "<|LOC_53|>": 100350, "<|LOC_54|>": 100351, "<|LOC_55|>": 100352, "<|LOC_56|>": 100353, "<|LOC_57|>": 100354, "<|LOC_58|>": 100355, "<|LOC_59|>": 100356, "<|LOC_60|>": 100357, "<|LOC_61|>": 100358, "<|LOC_62|>": 100359, "<|LOC_63|>": 100360, "<|LOC_64|>": 100361, "<|LOC_65|>": 100362, "<|LOC_66|>": 100363, "<|LOC_67|>": 100364, "<|LOC_68|>": 100365, "<|LOC_69|>": 100366, "<|LOC_70|>": 100367, "<|LOC_71|>": 100368, "<|LOC_72|>": 100369, "<|LOC_73|>": 100370, "<|LOC_74|>": 100371, "<|LOC_75|>": 100372, "<|LOC_76|>": 100373, "<|LOC_77|>": 100374, "<|LOC_78|>": 100375, "<|LOC_79|>": 100376, "<|LOC_80|>": 100377, "<|LOC_81|>": 100378, "<|LOC_82|>": 100379, "<|LOC_83|>": 100380, "<|LOC_84|>": 100381, "<|LOC_85|>": 100382, "<|LOC_86|>": 100383, "<|LOC_87|>": 100384, "<|LOC_88|>": 100385, "<|LOC_89|>": 100386, "<|LOC_90|>": 100387, "<|LOC_91|>": 100388, "<|LOC_92|>": 100389, "<|LOC_93|>": 100390, "<|LOC_94|>": 100391, "<|LOC_95|>": 100392, "<|LOC_96|>": 100393, "<|LOC_97|>": 100394, "<|LOC_98|>": 100395, "<|LOC_99|>": 100396, "<|LOC_100|>": 100397, "<|LOC_101|>": 100398, "<|LOC_102|>": 100399, "<|LOC_103|>": 100400, "<|LOC_104|>": 100401, "<|LOC_105|>": 100402, "<|LOC_106|>": 100403, "<|LOC_107|>": 100404, "<|LOC_108|>": 100405, "<|LOC_109|>": 100406, "<|LOC_110|>": 100407, "<|LOC_111|>": 100408, "<|LOC_112|>": 100409, "<|LOC_113|>": 100410, "<|LOC_114|>": 100411, "<|LOC_115|>": 100412, "<|LOC_116|>": 100413, "<|LOC_117|>": 100414, "<|LOC_118|>": 100415, "<|LOC_119|>": 100416, "<|LOC_120|>": 100417, "<|LOC_121|>": 100418, "<|LOC_122|>": 100419, "<|LOC_123|>": 100420, "<|LOC_124|>": 100421, "<|LOC_125|>": 100422, "<|LOC_126|>": 100423, "<|LOC_127|>": 100424, "<|LOC_128|>": 100425, "<|LOC_129|>": 100426, "<|LOC_130|>": 100427, "<|LOC_131|>": 100428, "<|LOC_132|>": 100429, "<|LOC_133|>": 100430, "<|LOC_134|>": 100431, "<|LOC_135|>": 100432, "<|LOC_136|>": 100433, "<|LOC_137|>": 100434, "<|LOC_138|>": 100435, "<|LOC_139|>": 100436, "<|LOC_140|>": 100437, "<|LOC_141|>": 100438, "<|LOC_142|>": 100439, "<|LOC_143|>": 100440, "<|LOC_144|>": 100441, "<|LOC_145|>": 100442, "<|LOC_146|>": 100443, "<|LOC_147|>": 100444, "<|LOC_148|>": 100445, "<|LOC_149|>": 100446, "<|LOC_150|>": 100447, "<|LOC_151|>": 100448, "<|LOC_152|>": 100449, "<|LOC_153|>": 100450, "<|LOC_154|>": 100451, "<|LOC_155|>": 100452, "<|LOC_156|>": 100453, "<|LOC_157|>": 100454, "<|LOC_158|>": 100455, "<|LOC_159|>": 100456, "<|LOC_160|>": 100457, "<|LOC_161|>": 100458, "<|LOC_162|>": 100459, "<|LOC_163|>": 100460, "<|LOC_164|>": 100461, "<|LOC_165|>": 100462, "<|LOC_166|>": 100463, "<|LOC_167|>": 100464, "<|LOC_168|>": 100465, "<|LOC_169|>": 100466, "<|LOC_170|>": 100467, "<|LOC_171|>": 100468, "<|LOC_172|>": 100469, "<|LOC_173|>": 100470, "<|LOC_174|>": 100471, "<|LOC_175|>": 100472, "<|LOC_176|>": 100473, "<|LOC_177|>": 100474, "<|LOC_178|>": 100475, "<|LOC_179|>": 100476, "<|LOC_180|>": 100477, "<|LOC_181|>": 100478, "<|LOC_182|>": 100479, "<|LOC_183|>": 100480, "<|LOC_184|>": 100481, "<|LOC_185|>": 100482, "<|LOC_186|>": 100483, "<|LOC_187|>": 100484, "<|LOC_188|>": 100485, "<|LOC_189|>": 100486, "<|LOC_190|>": 100487, "<|LOC_191|>": 100488, "<|LOC_192|>": 100489, "<|LOC_193|>": 100490, "<|LOC_194|>": 100491, "<|LOC_195|>": 100492, "<|LOC_196|>": 100493, "<|LOC_197|>": 100494, "<|LOC_198|>": 100495, "<|LOC_199|>": 100496, "<|LOC_200|>": 100497, "<|LOC_201|>": 100498, "<|LOC_202|>": 100499, "<|LOC_203|>": 100500, "<|LOC_204|>": 100501, "<|LOC_205|>": 100502, "<|LOC_206|>": 100503, "<|LOC_207|>": 100504, "<|LOC_208|>": 100505, "<|LOC_209|>": 100506, "<|LOC_210|>": 100507, "<|LOC_211|>": 100508, "<|LOC_212|>": 100509, "<|LOC_213|>": 100510, "<|LOC_214|>": 100511, "<|LOC_215|>": 100512, "<|LOC_216|>": 100513, "<|LOC_217|>": 100514, "<|LOC_218|>": 100515, "<|LOC_219|>": 100516, "<|LOC_220|>": 100517, "<|LOC_221|>": 100518, "<|LOC_222|>": 100519, "<|LOC_223|>": 100520, "<|LOC_224|>": 100521, "<|LOC_225|>": 100522, "<|LOC_226|>": 100523, "<|LOC_227|>": 100524, "<|LOC_228|>": 100525, "<|LOC_229|>": 100526, "<|LOC_230|>": 100527, "<|LOC_231|>": 100528, "<|LOC_232|>": 100529, "<|LOC_233|>": 100530, "<|LOC_234|>": 100531, "<|LOC_235|>": 100532, "<|LOC_236|>": 100533, "<|LOC_237|>": 100534, "<|LOC_238|>": 100535, "<|LOC_239|>": 100536, "<|LOC_240|>": 100537, "<|LOC_241|>": 100538, "<|LOC_242|>": 100539, "<|LOC_243|>": 100540, "<|LOC_244|>": 100541, "<|LOC_245|>": 100542, "<|LOC_246|>": 100543, "<|LOC_247|>": 100544, "<|LOC_248|>": 100545, "<|LOC_249|>": 100546, "<|LOC_250|>": 100547, "<|LOC_251|>": 100548, "<|LOC_252|>": 100549, "<|LOC_253|>": 100550, "<|LOC_254|>": 100551, "<|LOC_255|>": 100552, "<|LOC_256|>": 100553, "<|LOC_257|>": 100554, "<|LOC_258|>": 100555, "<|LOC_259|>": 100556, "<|LOC_260|>": 100557, "<|LOC_261|>": 100558, "<|LOC_262|>": 100559, "<|LOC_263|>": 100560, "<|LOC_264|>": 100561, "<|LOC_265|>": 100562, "<|LOC_266|>": 100563, "<|LOC_267|>": 100564, "<|LOC_268|>": 100565, "<|LOC_269|>": 100566, "<|LOC_270|>": 100567, "<|LOC_271|>": 100568, "<|LOC_272|>": 100569, "<|LOC_273|>": 100570, "<|LOC_274|>": 100571, "<|LOC_275|>": 100572, "<|LOC_276|>": 100573, "<|LOC_277|>": 100574, "<|LOC_278|>": 100575, "<|LOC_279|>": 100576, "<|LOC_280|>": 100577, "<|LOC_281|>": 100578, "<|LOC_282|>": 100579, "<|LOC_283|>": 100580, "<|LOC_284|>": 100581, "<|LOC_285|>": 100582, "<|LOC_286|>": 100583, "<|LOC_287|>": 100584, "<|LOC_288|>": 100585, "<|LOC_289|>": 100586, "<|LOC_290|>": 100587, "<|LOC_291|>": 100588, "<|LOC_292|>": 100589, "<|LOC_293|>": 100590, "<|LOC_294|>": 100591, "<|LOC_295|>": 100592, "<|LOC_296|>": 100593, "<|LOC_297|>": 100594, "<|LOC_298|>": 100595, "<|LOC_299|>": 100596, "<|LOC_300|>": 100597, "<|LOC_301|>": 100598, "<|LOC_302|>": 100599, "<|LOC_303|>": 100600, "<|LOC_304|>": 100601, "<|LOC_305|>": 100602, "<|LOC_306|>": 100603, "<|LOC_307|>": 100604, "<|LOC_308|>": 100605, "<|LOC_309|>": 100606, "<|LOC_310|>": 100607, "<|LOC_311|>": 100608, "<|LOC_312|>": 100609, "<|LOC_313|>": 100610, "<|LOC_314|>": 100611, "<|LOC_315|>": 100612, "<|LOC_316|>": 100613, "<|LOC_317|>": 100614, "<|LOC_318|>": 100615, "<|LOC_319|>": 100616, "<|LOC_320|>": 100617, "<|LOC_321|>": 100618, "<|LOC_322|>": 100619, "<|LOC_323|>": 100620, "<|LOC_324|>": 100621, "<|LOC_325|>": 100622, "<|LOC_326|>": 100623, "<|LOC_327|>": 100624, "<|LOC_328|>": 100625, "<|LOC_329|>": 100626, "<|LOC_330|>": 100627, "<|LOC_331|>": 100628, "<|LOC_332|>": 100629, "<|LOC_333|>": 100630, "<|LOC_334|>": 100631, "<|LOC_335|>": 100632, "<|LOC_336|>": 100633, "<|LOC_337|>": 100634, "<|LOC_338|>": 100635, "<|LOC_339|>": 100636, "<|LOC_340|>": 100637, "<|LOC_341|>": 100638, "<|LOC_342|>": 100639, "<|LOC_343|>": 100640, "<|LOC_344|>": 100641, "<|LOC_345|>": 100642, "<|LOC_346|>": 100643, "<|LOC_347|>": 100644, "<|LOC_348|>": 100645, "<|LOC_349|>": 100646, "<|LOC_350|>": 100647, "<|LOC_351|>": 100648, "<|LOC_352|>": 100649, "<|LOC_353|>": 100650, "<|LOC_354|>": 100651, "<|LOC_355|>": 100652, "<|LOC_356|>": 100653, "<|LOC_357|>": 100654, "<|LOC_358|>": 100655, "<|LOC_359|>": 100656, "<|LOC_360|>": 100657, "<|LOC_361|>": 100658, "<|LOC_362|>": 100659, "<|LOC_363|>": 100660, "<|LOC_364|>": 100661, "<|LOC_365|>": 100662, "<|LOC_366|>": 100663, "<|LOC_367|>": 100664, "<|LOC_368|>": 100665, "<|LOC_369|>": 100666, "<|LOC_370|>": 100667, "<|LOC_371|>": 100668, "<|LOC_372|>": 100669, "<|LOC_373|>": 100670, "<|LOC_374|>": 100671, "<|LOC_375|>": 100672, "<|LOC_376|>": 100673, "<|LOC_377|>": 100674, "<|LOC_378|>": 100675, "<|LOC_379|>": 100676, "<|LOC_380|>": 100677, "<|LOC_381|>": 100678, "<|LOC_382|>": 100679, "<|LOC_383|>": 100680, "<|LOC_384|>": 100681, "<|LOC_385|>": 100682, "<|LOC_386|>": 100683, "<|LOC_387|>": 100684, "<|LOC_388|>": 100685, "<|LOC_389|>": 100686, "<|LOC_390|>": 100687, "<|LOC_391|>": 100688, "<|LOC_392|>": 100689, "<|LOC_393|>": 100690, "<|LOC_394|>": 100691, "<|LOC_395|>": 100692, "<|LOC_396|>": 100693, "<|LOC_397|>": 100694, "<|LOC_398|>": 100695, "<|LOC_399|>": 100696, "<|LOC_400|>": 100697, "<|LOC_401|>": 100698, "<|LOC_402|>": 100699, "<|LOC_403|>": 100700, "<|LOC_404|>": 100701, "<|LOC_405|>": 100702, "<|LOC_406|>": 100703, "<|LOC_407|>": 100704, "<|LOC_408|>": 100705, "<|LOC_409|>": 100706, "<|LOC_410|>": 100707, "<|LOC_411|>": 100708, "<|LOC_412|>": 100709, "<|LOC_413|>": 100710, "<|LOC_414|>": 100711, "<|LOC_415|>": 100712, "<|LOC_416|>": 100713, "<|LOC_417|>": 100714, "<|LOC_418|>": 100715, "<|LOC_419|>": 100716, "<|LOC_420|>": 100717, "<|LOC_421|>": 100718, "<|LOC_422|>": 100719, "<|LOC_423|>": 100720, "<|LOC_424|>": 100721, "<|LOC_425|>": 100722, "<|LOC_426|>": 100723, "<|LOC_427|>": 100724, "<|LOC_428|>": 100725, "<|LOC_429|>": 100726, "<|LOC_430|>": 100727, "<|LOC_431|>": 100728, "<|LOC_432|>": 100729, "<|LOC_433|>": 100730, "<|LOC_434|>": 100731, "<|LOC_435|>": 100732, "<|LOC_436|>": 100733, "<|LOC_437|>": 100734, "<|LOC_438|>": 100735, "<|LOC_439|>": 100736, "<|LOC_440|>": 100737, "<|LOC_441|>": 100738, "<|LOC_442|>": 100739, "<|LOC_443|>": 100740, "<|LOC_444|>": 100741, "<|LOC_445|>": 100742, "<|LOC_446|>": 100743, "<|LOC_447|>": 100744, "<|LOC_448|>": 100745, "<|LOC_449|>": 100746, "<|LOC_450|>": 100747, "<|LOC_451|>": 100748, "<|LOC_452|>": 100749, "<|LOC_453|>": 100750, "<|LOC_454|>": 100751, "<|LOC_455|>": 100752, "<|LOC_456|>": 100753, "<|LOC_457|>": 100754, "<|LOC_458|>": 100755, "<|LOC_459|>": 100756, "<|LOC_460|>": 100757, "<|LOC_461|>": 100758, "<|LOC_462|>": 100759, "<|LOC_463|>": 100760, "<|LOC_464|>": 100761, "<|LOC_465|>": 100762, "<|LOC_466|>": 100763, "<|LOC_467|>": 100764, "<|LOC_468|>": 100765, "<|LOC_469|>": 100766, "<|LOC_470|>": 100767, "<|LOC_471|>": 100768, "<|LOC_472|>": 100769, "<|LOC_473|>": 100770, "<|LOC_474|>": 100771, "<|LOC_475|>": 100772, "<|LOC_476|>": 100773, "<|LOC_477|>": 100774, "<|LOC_478|>": 100775, "<|LOC_479|>": 100776, "<|LOC_480|>": 100777, "<|LOC_481|>": 100778, "<|LOC_482|>": 100779, "<|LOC_483|>": 100780, "<|LOC_484|>": 100781, "<|LOC_485|>": 100782, "<|LOC_486|>": 100783, "<|LOC_487|>": 100784, "<|LOC_488|>": 100785, "<|LOC_489|>": 100786, "<|LOC_490|>": 100787, "<|LOC_491|>": 100788, "<|LOC_492|>": 100789, "<|LOC_493|>": 100790, "<|LOC_494|>": 100791, "<|LOC_495|>": 100792, "<|LOC_496|>": 100793, "<|LOC_497|>": 100794, "<|LOC_498|>": 100795, "<|LOC_499|>": 100796, "<|LOC_500|>": 100797, "<|LOC_501|>": 100798, "<|LOC_502|>": 100799, "<|LOC_503|>": 100800, "<|LOC_504|>": 100801, "<|LOC_505|>": 100802, "<|LOC_506|>": 100803, "<|LOC_507|>": 100804, "<|LOC_508|>": 100805, "<|LOC_509|>": 100806, "<|LOC_510|>": 100807, "<|LOC_511|>": 100808, "<|LOC_512|>": 100809, "<|LOC_513|>": 100810, "<|LOC_514|>": 100811, "<|LOC_515|>": 100812, "<|LOC_516|>": 100813, "<|LOC_517|>": 100814, "<|LOC_518|>": 100815, "<|LOC_519|>": 100816, "<|LOC_520|>": 100817, "<|LOC_521|>": 100818, "<|LOC_522|>": 100819, "<|LOC_523|>": 100820, "<|LOC_524|>": 100821, "<|LOC_525|>": 100822, "<|LOC_526|>": 100823, "<|LOC_527|>": 100824, "<|LOC_528|>": 100825, "<|LOC_529|>": 100826, "<|LOC_530|>": 100827, "<|LOC_531|>": 100828, "<|LOC_532|>": 100829, "<|LOC_533|>": 100830, "<|LOC_534|>": 100831, "<|LOC_535|>": 100832, "<|LOC_536|>": 100833, "<|LOC_537|>": 100834, "<|LOC_538|>": 100835, "<|LOC_539|>": 100836, "<|LOC_540|>": 100837, "<|LOC_541|>": 100838, "<|LOC_542|>": 100839, "<|LOC_543|>": 100840, "<|LOC_544|>": 100841, "<|LOC_545|>": 100842, "<|LOC_546|>": 100843, "<|LOC_547|>": 100844, "<|LOC_548|>": 100845, "<|LOC_549|>": 100846, "<|LOC_550|>": 100847, "<|LOC_551|>": 100848, "<|LOC_552|>": 100849, "<|LOC_553|>": 100850, "<|LOC_554|>": 100851, "<|LOC_555|>": 100852, "<|LOC_556|>": 100853, "<|LOC_557|>": 100854, "<|LOC_558|>": 100855, "<|LOC_559|>": 100856, "<|LOC_560|>": 100857, "<|LOC_561|>": 100858, "<|LOC_562|>": 100859, "<|LOC_563|>": 100860, "<|LOC_564|>": 100861, "<|LOC_565|>": 100862, "<|LOC_566|>": 100863, "<|LOC_567|>": 100864, "<|LOC_568|>": 100865, "<|LOC_569|>": 100866, "<|LOC_570|>": 100867, "<|LOC_571|>": 100868, "<|LOC_572|>": 100869, "<|LOC_573|>": 100870, "<|LOC_574|>": 100871, "<|LOC_575|>": 100872, "<|LOC_576|>": 100873, "<|LOC_577|>": 100874, "<|LOC_578|>": 100875, "<|LOC_579|>": 100876, "<|LOC_580|>": 100877, "<|LOC_581|>": 100878, "<|LOC_582|>": 100879, "<|LOC_583|>": 100880, "<|LOC_584|>": 100881, "<|LOC_585|>": 100882, "<|LOC_586|>": 100883, "<|LOC_587|>": 100884, "<|LOC_588|>": 100885, "<|LOC_589|>": 100886, "<|LOC_590|>": 100887, "<|LOC_591|>": 100888, "<|LOC_592|>": 100889, "<|LOC_593|>": 100890, "<|LOC_594|>": 100891, "<|LOC_595|>": 100892, "<|LOC_596|>": 100893, "<|LOC_597|>": 100894, "<|LOC_598|>": 100895, "<|LOC_599|>": 100896, "<|LOC_600|>": 100897, "<|LOC_601|>": 100898, "<|LOC_602|>": 100899, "<|LOC_603|>": 100900, "<|LOC_604|>": 100901, "<|LOC_605|>": 100902, "<|LOC_606|>": 100903, "<|LOC_607|>": 100904, "<|LOC_608|>": 100905, "<|LOC_609|>": 100906, "<|LOC_610|>": 100907, "<|LOC_611|>": 100908, "<|LOC_612|>": 100909, "<|LOC_613|>": 100910, "<|LOC_614|>": 100911, "<|LOC_615|>": 100912, "<|LOC_616|>": 100913, "<|LOC_617|>": 100914, "<|LOC_618|>": 100915, "<|LOC_619|>": 100916, "<|LOC_620|>": 100917, "<|LOC_621|>": 100918, "<|LOC_622|>": 100919, "<|LOC_623|>": 100920, "<|LOC_624|>": 100921, "<|LOC_625|>": 100922, "<|LOC_626|>": 100923, "<|LOC_627|>": 100924, "<|LOC_628|>": 100925, "<|LOC_629|>": 100926, "<|LOC_630|>": 100927, "<|LOC_631|>": 100928, "<|LOC_632|>": 100929, "<|LOC_633|>": 100930, "<|LOC_634|>": 100931, "<|LOC_635|>": 100932, "<|LOC_636|>": 100933, "<|LOC_637|>": 100934, "<|LOC_638|>": 100935, "<|LOC_639|>": 100936, "<|LOC_640|>": 100937, "<|LOC_641|>": 100938, "<|LOC_642|>": 100939, "<|LOC_643|>": 100940, "<|LOC_644|>": 100941, "<|LOC_645|>": 100942, "<|LOC_646|>": 100943, "<|LOC_647|>": 100944, "<|LOC_648|>": 100945, "<|LOC_649|>": 100946, "<|LOC_650|>": 100947, "<|LOC_651|>": 100948, "<|LOC_652|>": 100949, "<|LOC_653|>": 100950, "<|LOC_654|>": 100951, "<|LOC_655|>": 100952, "<|LOC_656|>": 100953, "<|LOC_657|>": 100954, "<|LOC_658|>": 100955, "<|LOC_659|>": 100956, "<|LOC_660|>": 100957, "<|LOC_661|>": 100958, "<|LOC_662|>": 100959, "<|LOC_663|>": 100960, "<|LOC_664|>": 100961, "<|LOC_665|>": 100962, "<|LOC_666|>": 100963, "<|LOC_667|>": 100964, "<|LOC_668|>": 100965, "<|LOC_669|>": 100966, "<|LOC_670|>": 100967, "<|LOC_671|>": 100968, "<|LOC_672|>": 100969, "<|LOC_673|>": 100970, "<|LOC_674|>": 100971, "<|LOC_675|>": 100972, "<|LOC_676|>": 100973, "<|LOC_677|>": 100974, "<|LOC_678|>": 100975, "<|LOC_679|>": 100976, "<|LOC_680|>": 100977, "<|LOC_681|>": 100978, "<|LOC_682|>": 100979, "<|LOC_683|>": 100980, "<|LOC_684|>": 100981, "<|LOC_685|>": 100982, "<|LOC_686|>": 100983, "<|LOC_687|>": 100984, "<|LOC_688|>": 100985, "<|LOC_689|>": 100986, "<|LOC_690|>": 100987, "<|LOC_691|>": 100988, "<|LOC_692|>": 100989, "<|LOC_693|>": 100990, "<|LOC_694|>": 100991, "<|LOC_695|>": 100992, "<|LOC_696|>": 100993, "<|LOC_697|>": 100994, "<|LOC_698|>": 100995, "<|LOC_699|>": 100996, "<|LOC_700|>": 100997, "<|LOC_701|>": 100998, "<|LOC_702|>": 100999, "<|LOC_703|>": 101000, "<|LOC_704|>": 101001, "<|LOC_705|>": 101002, "<|LOC_706|>": 101003, "<|LOC_707|>": 101004, "<|LOC_708|>": 101005, "<|LOC_709|>": 101006, "<|LOC_710|>": 101007, "<|LOC_711|>": 101008, "<|LOC_712|>": 101009, "<|LOC_713|>": 101010, "<|LOC_714|>": 101011, "<|LOC_715|>": 101012, "<|LOC_716|>": 101013, "<|LOC_717|>": 101014, "<|LOC_718|>": 101015, "<|LOC_719|>": 101016, "<|LOC_720|>": 101017, "<|LOC_721|>": 101018, "<|LOC_722|>": 101019, "<|LOC_723|>": 101020, "<|LOC_724|>": 101021, "<|LOC_725|>": 101022, "<|LOC_726|>": 101023, "<|LOC_727|>": 101024, "<|LOC_728|>": 101025, "<|LOC_729|>": 101026, "<|LOC_730|>": 101027, "<|LOC_731|>": 101028, "<|LOC_732|>": 101029, "<|LOC_733|>": 101030, "<|LOC_734|>": 101031, "<|LOC_735|>": 101032, "<|LOC_736|>": 101033, "<|LOC_737|>": 101034, "<|LOC_738|>": 101035, "<|LOC_739|>": 101036, "<|LOC_740|>": 101037, "<|LOC_741|>": 101038, "<|LOC_742|>": 101039, "<|LOC_743|>": 101040, "<|LOC_744|>": 101041, "<|LOC_745|>": 101042, "<|LOC_746|>": 101043, "<|LOC_747|>": 101044, "<|LOC_748|>": 101045, "<|LOC_749|>": 101046, "<|LOC_750|>": 101047, "<|LOC_751|>": 101048, "<|LOC_752|>": 101049, "<|LOC_753|>": 101050, "<|LOC_754|>": 101051, "<|LOC_755|>": 101052, "<|LOC_756|>": 101053, "<|LOC_757|>": 101054, "<|LOC_758|>": 101055, "<|LOC_759|>": 101056, "<|LOC_760|>": 101057, "<|LOC_761|>": 101058, "<|LOC_762|>": 101059, "<|LOC_763|>": 101060, "<|LOC_764|>": 101061, "<|LOC_765|>": 101062, "<|LOC_766|>": 101063, "<|LOC_767|>": 101064, "<|LOC_768|>": 101065, "<|LOC_769|>": 101066, "<|LOC_770|>": 101067, "<|LOC_771|>": 101068, "<|LOC_772|>": 101069, "<|LOC_773|>": 101070, "<|LOC_774|>": 101071, "<|LOC_775|>": 101072, "<|LOC_776|>": 101073, "<|LOC_777|>": 101074, "<|LOC_778|>": 101075, "<|LOC_779|>": 101076, "<|LOC_780|>": 101077, "<|LOC_781|>": 101078, "<|LOC_782|>": 101079, "<|LOC_783|>": 101080, "<|LOC_784|>": 101081, "<|LOC_785|>": 101082, "<|LOC_786|>": 101083, "<|LOC_787|>": 101084, "<|LOC_788|>": 101085, "<|LOC_789|>": 101086, "<|LOC_790|>": 101087, "<|LOC_791|>": 101088, "<|LOC_792|>": 101089, "<|LOC_793|>": 101090, "<|LOC_794|>": 101091, "<|LOC_795|>": 101092, "<|LOC_796|>": 101093, "<|LOC_797|>": 101094, "<|LOC_798|>": 101095, "<|LOC_799|>": 101096, "<|LOC_800|>": 101097, "<|LOC_801|>": 101098, "<|LOC_802|>": 101099, "<|LOC_803|>": 101100, "<|LOC_804|>": 101101, "<|LOC_805|>": 101102, "<|LOC_806|>": 101103, "<|LOC_807|>": 101104, "<|LOC_808|>": 101105, "<|LOC_809|>": 101106, "<|LOC_810|>": 101107, "<|LOC_811|>": 101108, "<|LOC_812|>": 101109, "<|LOC_813|>": 101110, "<|LOC_814|>": 101111, "<|LOC_815|>": 101112, "<|LOC_816|>": 101113, "<|LOC_817|>": 101114, "<|LOC_818|>": 101115, "<|LOC_819|>": 101116, "<|LOC_820|>": 101117, "<|LOC_821|>": 101118, "<|LOC_822|>": 101119, "<|LOC_823|>": 101120, "<|LOC_824|>": 101121, "<|LOC_825|>": 101122, "<|LOC_826|>": 101123, "<|LOC_827|>": 101124, "<|LOC_828|>": 101125, "<|LOC_829|>": 101126, "<|LOC_830|>": 101127, "<|LOC_831|>": 101128, "<|LOC_832|>": 101129, "<|LOC_833|>": 101130, "<|LOC_834|>": 101131, "<|LOC_835|>": 101132, "<|LOC_836|>": 101133, "<|LOC_837|>": 101134, "<|LOC_838|>": 101135, "<|LOC_839|>": 101136, "<|LOC_840|>": 101137, "<|LOC_841|>": 101138, "<|LOC_842|>": 101139, "<|LOC_843|>": 101140, "<|LOC_844|>": 101141, "<|LOC_845|>": 101142, "<|LOC_846|>": 101143, "<|LOC_847|>": 101144, "<|LOC_848|>": 101145, "<|LOC_849|>": 101146, "<|LOC_850|>": 101147, "<|LOC_851|>": 101148, "<|LOC_852|>": 101149, "<|LOC_853|>": 101150, "<|LOC_854|>": 101151, "<|LOC_855|>": 101152, "<|LOC_856|>": 101153, "<|LOC_857|>": 101154, "<|LOC_858|>": 101155, "<|LOC_859|>": 101156, "<|LOC_860|>": 101157, "<|LOC_861|>": 101158, "<|LOC_862|>": 101159, "<|LOC_863|>": 101160, "<|LOC_864|>": 101161, "<|LOC_865|>": 101162, "<|LOC_866|>": 101163, "<|LOC_867|>": 101164, "<|LOC_868|>": 101165, "<|LOC_869|>": 101166, "<|LOC_870|>": 101167, "<|LOC_871|>": 101168, "<|LOC_872|>": 101169, "<|LOC_873|>": 101170, "<|LOC_874|>": 101171, "<|LOC_875|>": 101172, "<|LOC_876|>": 101173, "<|LOC_877|>": 101174, "<|LOC_878|>": 101175, "<|LOC_879|>": 101176, "<|LOC_880|>": 101177, "<|LOC_881|>": 101178, "<|LOC_882|>": 101179, "<|LOC_883|>": 101180, "<|LOC_884|>": 101181, "<|LOC_885|>": 101182, "<|LOC_886|>": 101183, "<|LOC_887|>": 101184, "<|LOC_888|>": 101185, "<|LOC_889|>": 101186, "<|LOC_890|>": 101187, "<|LOC_891|>": 101188, "<|LOC_892|>": 101189, "<|LOC_893|>": 101190, "<|LOC_894|>": 101191, "<|LOC_895|>": 101192, "<|LOC_896|>": 101193, "<|LOC_897|>": 101194, "<|LOC_898|>": 101195, "<|LOC_899|>": 101196, "<|LOC_900|>": 101197, "<|LOC_901|>": 101198, "<|LOC_902|>": 101199, "<|LOC_903|>": 101200, "<|LOC_904|>": 101201, "<|LOC_905|>": 101202, "<|LOC_906|>": 101203, "<|LOC_907|>": 101204, "<|LOC_908|>": 101205, "<|LOC_909|>": 101206, "<|LOC_910|>": 101207, "<|LOC_911|>": 101208, "<|LOC_912|>": 101209, "<|LOC_913|>": 101210, "<|LOC_914|>": 101211, "<|LOC_915|>": 101212, "<|LOC_916|>": 101213, "<|LOC_917|>": 101214, "<|LOC_918|>": 101215, "<|LOC_919|>": 101216, "<|LOC_920|>": 101217, "<|LOC_921|>": 101218, "<|LOC_922|>": 101219, "<|LOC_923|>": 101220, "<|LOC_924|>": 101221, "<|LOC_925|>": 101222, "<|LOC_926|>": 101223, "<|LOC_927|>": 101224, "<|LOC_928|>": 101225, "<|LOC_929|>": 101226, "<|LOC_930|>": 101227, "<|LOC_931|>": 101228, "<|LOC_932|>": 101229, "<|LOC_933|>": 101230, "<|LOC_934|>": 101231, "<|LOC_935|>": 101232, "<|LOC_936|>": 101233, "<|LOC_937|>": 101234, "<|LOC_938|>": 101235, "<|LOC_939|>": 101236, "<|LOC_940|>": 101237, "<|LOC_941|>": 101238, "<|LOC_942|>": 101239, "<|LOC_943|>": 101240, "<|LOC_944|>": 101241, "<|LOC_945|>": 101242, "<|LOC_946|>": 101243, "<|LOC_947|>": 101244, "<|LOC_948|>": 101245, "<|LOC_949|>": 101246, "<|LOC_950|>": 101247, "<|LOC_951|>": 101248, "<|LOC_952|>": 101249, "<|LOC_953|>": 101250, "<|LOC_954|>": 101251, "<|LOC_955|>": 101252, "<|LOC_956|>": 101253, "<|LOC_957|>": 101254, "<|LOC_958|>": 101255, "<|LOC_959|>": 101256, "<|LOC_960|>": 101257, "<|LOC_961|>": 101258, "<|LOC_962|>": 101259, "<|LOC_963|>": 101260, "<|LOC_964|>": 101261, "<|LOC_965|>": 101262, "<|LOC_966|>": 101263, "<|LOC_967|>": 101264, "<|LOC_968|>": 101265, "<|LOC_969|>": 101266, "<|LOC_970|>": 101267, "<|LOC_971|>": 101268, "<|LOC_972|>": 101269, "<|LOC_973|>": 101270, "<|LOC_974|>": 101271, "<|LOC_975|>": 101272, "<|LOC_976|>": 101273, "<|LOC_977|>": 101274, "<|LOC_978|>": 101275, "<|LOC_979|>": 101276, "<|LOC_980|>": 101277, "<|LOC_981|>": 101278, "<|LOC_982|>": 101279, "<|LOC_983|>": 101280, "<|LOC_984|>": 101281, "<|LOC_985|>": 101282, "<|LOC_986|>": 101283, "<|LOC_987|>": 101284, "<|LOC_988|>": 101285, "<|LOC_989|>": 101286, "<|LOC_990|>": 101287, "<|LOC_991|>": 101288, "<|LOC_992|>": 101289, "<|LOC_993|>": 101290, "<|LOC_994|>": 101291, "<|LOC_995|>": 101292, "<|LOC_996|>": 101293, "<|LOC_997|>": 101294, "<|LOC_998|>": 101295, "<|LOC_999|>": 101296, "<|LOC_1000|>": 101297, "<|LOC_BEGIN|>": 101298, "<|LOC_END|>": 101299, "<|LOC_SEP|>": 101300, "<|CROP_COL_SEP|>": 101301, "<|CROP_ROW_SEP|>": 101302, "<|IMAGE_SEP|>": 101303, "<|IMAGE_START|>": 101304, "<|IMAGE_END|>": 101305, "<|VIDEO_START|>": 101306, "<|VIDEO_END|>": 101307, "<|ASR_START|>": 101308, "<|ASR_END|>": 101309, "<|IMAGE_UNUSE:6|>": 101310, "<|IMAGE_UNUSE:7|>": 101311, "<|IMAGE_UNUSE:8|>": 101312, "<|IMAGE_UNUSE:9|>": 101313, "<|IMAGE_UNUSE:10|>": 101314, "<|IMAGE_UNUSE:11|>": 101315, "<|IMAGE_UNUSE:12|>": 101316, "<|IMAGE_UNUSE:13|>": 101317, "<|IMAGE_UNUSE:14|>": 101318, "<|IMAGE_UNUSE:15|>": 101319, "<|IMAGE_UNUSE:16|>": 101320, "<|IMAGE_UNUSE:17|>": 101321, "<|IMAGE_UNUSE:18|>": 101322, "<|IMAGE_UNUSE:19|>": 101323, "<|IMAGE_UNUSE:20|>": 101324, "<|IMAGE_UNUSE:21|>": 101325, "<|IMAGE_UNUSE:22|>": 101326, "<|IMAGE_UNUSE:23|>": 101327, "<|IMAGE_UNUSE:24|>": 101328, "<|IMAGE_UNUSE:25|>": 101329, "<|IMAGE_UNUSE:26|>": 101330, "<|IMAGE_UNUSE:27|>": 101331, "<|IMAGE_UNUSE:28|>": 101332, "<|IMAGE_UNUSE:29|>": 101333, "<|IMAGE_UNUSE:30|>": 101334, "<|IMAGE_UNUSE:31|>": 101335, "<|IMAGE_UNUSE:32|>": 101336, "<|IMAGE_UNUSE:33|>": 101337, "<|IMAGE_UNUSE:34|>": 101338, "<|IMAGE_UNUSE:35|>": 101339, "<|IMAGE_UNUSE:36|>": 101340, "<|IMAGE_UNUSE:37|>": 101341, "<|IMAGE_UNUSE:38|>": 101342, "<|IMAGE_UNUSE:39|>": 101343, "<|IMAGE_UNUSE:40|>": 101344, "<|IMAGE_UNUSE:41|>": 101345, "<|IMAGE_UNUSE:42|>": 101346, "<|IMAGE_UNUSE:43|>": 101347, "<|IMAGE_UNUSE:44|>": 101348, "<|IMAGE_UNUSE:45|>": 101349, "<|IMAGE_UNUSE:46|>": 101350, "<|IMAGE_UNUSE:47|>": 101351, "<|IMAGE_UNUSE:48|>": 101352, "<|IMAGE_UNUSE:49|>": 101353, "<|IMAGE_UNUSE:50|>": 101354, "<|IMAGE_UNUSE:51|>": 101355, "<|IMAGE_UNUSE:52|>": 101356, "<|IMAGE_UNUSE:53|>": 101357, "<|IMAGE_UNUSE:54|>": 101358, "<|IMAGE_UNUSE:55|>": 101359, "<|IMAGE_UNUSE:56|>": 101360, "<|IMAGE_UNUSE:57|>": 101361, "<|IMAGE_UNUSE:58|>": 101362, "<|IMAGE_UNUSE:59|>": 101363, "<|IMAGE_UNUSE:60|>": 101364, "<|IMAGE_UNUSE:61|>": 101365, "<|IMAGE_UNUSE:62|>": 101366, "<|IMAGE_UNUSE:63|>": 101367, "<|IMAGE_UNUSE:64|>": 101368, "<|IMAGE_UNUSE:65|>": 101369, "<|IMAGE_UNUSE:66|>": 101370, "<|IMAGE_UNUSE:67|>": 101371, "<|IMAGE_UNUSE:68|>": 101372, "<|IMAGE_UNUSE:69|>": 101373, "<|IMAGE_UNUSE:70|>": 101374, "<|IMAGE_UNUSE:71|>": 101375, "<|IMAGE_UNUSE:72|>": 101376, "<|IMAGE_UNUSE:73|>": 101377, "<|IMAGE_UNUSE:74|>": 101378, "<|IMAGE_UNUSE:75|>": 101379, "<|IMAGE_UNUSE:76|>": 101380, "<|IMAGE_UNUSE:77|>": 101381, "<|IMAGE_UNUSE:78|>": 101382, "<|IMAGE_UNUSE:79|>": 101383, "<|IMAGE_UNUSE:80|>": 101384, "<|IMAGE_UNUSE:81|>": 101385, "<|IMAGE_UNUSE:82|>": 101386, "<|IMAGE_UNUSE:83|>": 101387, "<|IMAGE_UNUSE:84|>": 101388, "<|IMAGE_UNUSE:85|>": 101389, "<|IMAGE_UNUSE:86|>": 101390, "<|IMAGE_UNUSE:87|>": 101391, "<|IMAGE_UNUSE:88|>": 101392, "<|IMAGE_UNUSE:89|>": 101393, "<|IMAGE_UNUSE:90|>": 101394, "<|IMAGE_UNUSE:91|>": 101395, "<|IMAGE_UNUSE:92|>": 101396, "<|IMAGE_UNUSE:93|>": 101397, "<|IMAGE_UNUSE:94|>": 101398, "<|IMAGE_UNUSE:95|>": 101399, "<|IMAGE_UNUSE:96|>": 101400, "<|IMAGE_UNUSE:97|>": 101401, "<|IMAGE_UNUSE:98|>": 101402, "<|IMAGE_UNUSE:99|>": 101403, "<|IMAGE_UNUSE:100|>": 101404, "<|IMAGE_UNUSE:101|>": 101405, "<|IMAGE_UNUSE:102|>": 101406, "<|IMAGE_UNUSE:103|>": 101407, "<|IMAGE_UNUSE:104|>": 101408, "<|IMAGE_UNUSE:105|>": 101409, "<|IMAGE_UNUSE:106|>": 101410, "<|IMAGE_UNUSE:107|>": 101411, "<|IMAGE_UNUSE:108|>": 101412, "<|IMAGE_UNUSE:109|>": 101413, "<|IMAGE_UNUSE:110|>": 101414, "<|IMAGE_UNUSE:111|>": 101415, "<|IMAGE_UNUSE:112|>": 101416, "<|IMAGE_UNUSE:113|>": 101417, "<|IMAGE_UNUSE:114|>": 101418, "<|IMAGE_UNUSE:115|>": 101419, "<|IMAGE_UNUSE:116|>": 101420, "<|IMAGE_UNUSE:117|>": 101421, "<|IMAGE_UNUSE:118|>": 101422, "<|IMAGE_UNUSE:119|>": 101423, "<|IMAGE_UNUSE:120|>": 101424, "<|IMAGE_UNUSE:121|>": 101425, "<|IMAGE_UNUSE:122|>": 101426, "<|IMAGE_UNUSE:123|>": 101427, "<|IMAGE_UNUSE:124|>": 101428, "<|IMAGE_UNUSE:125|>": 101429, "<|IMAGE_UNUSE:126|>": 101430, "<|IMAGE_UNUSE:127|>": 101431, "<|IMAGE_UNUSE:128|>": 101432, "<|IMAGE_UNUSE:129|>": 101433, "<|IMAGE_UNUSE:130|>": 101434, "<|IMAGE_UNUSE:131|>": 101435, "<|IMAGE_UNUSE:132|>": 101436, "<|IMAGE_UNUSE:133|>": 101437, "<|IMAGE_UNUSE:134|>": 101438, "<|IMAGE_UNUSE:135|>": 101439, "<|IMAGE_UNUSE:136|>": 101440, "<|IMAGE_UNUSE:137|>": 101441, "<|IMAGE_UNUSE:138|>": 101442, "<|IMAGE_UNUSE:139|>": 101443, "<|IMAGE_UNUSE:140|>": 101444, "<|IMAGE_UNUSE:141|>": 101445, "<|IMAGE_UNUSE:142|>": 101446, "<|IMAGE_UNUSE:143|>": 101447, "<|IMAGE_UNUSE:144|>": 101448, "<|IMAGE_UNUSE:145|>": 101449, "<|IMAGE_UNUSE:146|>": 101450, "<|IMAGE_UNUSE:147|>": 101451, "<|IMAGE_UNUSE:148|>": 101452, "<|IMAGE_UNUSE:149|>": 101453, "<|IMAGE_UNUSE:150|>": 101454, "<|IMAGE_UNUSE:151|>": 101455, "<|IMAGE_UNUSE:152|>": 101456, "<|IMAGE_UNUSE:153|>": 101457, "<|IMAGE_UNUSE:154|>": 101458, "<|IMAGE_UNUSE:155|>": 101459, "<|IMAGE_UNUSE:156|>": 101460, "<|IMAGE_UNUSE:157|>": 101461, "<|IMAGE_UNUSE:158|>": 101462, "<|IMAGE_UNUSE:159|>": 101463, "<|IMAGE_UNUSE:160|>": 101464, "<|IMAGE_UNUSE:161|>": 101465, "<|IMAGE_UNUSE:162|>": 101466, "<|IMAGE_UNUSE:163|>": 101467, "<|IMAGE_UNUSE:164|>": 101468, "<|IMAGE_UNUSE:165|>": 101469, "<|IMAGE_UNUSE:166|>": 101470, "<|IMAGE_UNUSE:167|>": 101471, "<|IMAGE_UNUSE:168|>": 101472, "<|IMAGE_UNUSE:169|>": 101473, "<|IMAGE_UNUSE:170|>": 101474, "<|IMAGE_UNUSE:171|>": 101475, "<|IMAGE_UNUSE:172|>": 101476, "<|IMAGE_UNUSE:173|>": 101477, "<|IMAGE_UNUSE:174|>": 101478, "<|IMAGE_UNUSE:175|>": 101479, "<|IMAGE_UNUSE:176|>": 101480, "<|IMAGE_UNUSE:177|>": 101481, "<|IMAGE_UNUSE:178|>": 101482, "<|IMAGE_UNUSE:179|>": 101483, "<|IMAGE_UNUSE:180|>": 101484, "<|IMAGE_UNUSE:181|>": 101485, "<|IMAGE_UNUSE:182|>": 101486, "<|IMAGE_UNUSE:183|>": 101487, "<|IMAGE_UNUSE:184|>": 101488, "<|IMAGE_UNUSE:185|>": 101489, "<|IMAGE_UNUSE:186|>": 101490, "<|IMAGE_UNUSE:187|>": 101491, "<|IMAGE_UNUSE:188|>": 101492, "<|IMAGE_UNUSE:189|>": 101493, "<|IMAGE_UNUSE:190|>": 101494, "<|IMAGE_UNUSE:191|>": 101495, "<|IMAGE_UNUSE:192|>": 101496, "<|IMAGE_UNUSE:193|>": 101497, "<|IMAGE_UNUSE:194|>": 101498, "<|IMAGE_UNUSE:195|>": 101499, "<|IMAGE_UNUSE:196|>": 101500, "<|IMAGE_UNUSE:197|>": 101501, "<|IMAGE_UNUSE:198|>": 101502, "<|IMAGE_UNUSE:199|>": 101503, "<|IMAGE_UNUSE:200|>": 101504, "<|IMAGE_UNUSE:201|>": 101505, "<|IMAGE_UNUSE:202|>": 101506, "<|IMAGE_UNUSE:203|>": 101507, "<|IMAGE_UNUSE:204|>": 101508, "<|IMAGE_UNUSE:205|>": 101509, "<|IMAGE_UNUSE:206|>": 101510, "<|IMAGE_UNUSE:207|>": 101511, "<|IMAGE_UNUSE:208|>": 101512, "<|IMAGE_UNUSE:209|>": 101513, "<|IMAGE_UNUSE:210|>": 101514, "<|IMAGE_UNUSE:211|>": 101515, "<|IMAGE_UNUSE:212|>": 101516, "<|IMAGE_UNUSE:213|>": 101517, "<|IMAGE_UNUSE:214|>": 101518, "<|IMAGE_UNUSE:215|>": 101519, "<|IMAGE_UNUSE:216|>": 101520, "<|IMAGE_UNUSE:217|>": 101521, "<|IMAGE_UNUSE:218|>": 101522, "<|IMAGE_UNUSE:219|>": 101523, "<|IMAGE_UNUSE:220|>": 101524, "<|IMAGE_UNUSE:221|>": 101525, "<|IMAGE_UNUSE:222|>": 101526, "<|IMAGE_UNUSE:223|>": 101527, "<|IMAGE_UNUSE:224|>": 101528, "<|IMAGE_UNUSE:225|>": 101529, "<|IMAGE_UNUSE:226|>": 101530, "<|IMAGE_UNUSE:227|>": 101531, "<|IMAGE_UNUSE:228|>": 101532, "<|IMAGE_UNUSE:229|>": 101533, "<|IMAGE_UNUSE:230|>": 101534, "<|IMAGE_UNUSE:231|>": 101535, "<|IMAGE_UNUSE:232|>": 101536, "<|IMAGE_UNUSE:233|>": 101537, "<|IMAGE_UNUSE:234|>": 101538, "<|IMAGE_UNUSE:235|>": 101539, "<|IMAGE_UNUSE:236|>": 101540, "<|IMAGE_UNUSE:237|>": 101541, "<|IMAGE_UNUSE:238|>": 101542, "<|IMAGE_UNUSE:239|>": 101543, "<|IMAGE_UNUSE:240|>": 101544, "<|IMAGE_UNUSE:241|>": 101545, "<|IMAGE_UNUSE:242|>": 101546, "<|IMAGE_UNUSE:243|>": 101547, "<|IMAGE_UNUSE:244|>": 101548, "<|IMAGE_UNUSE:245|>": 101549, "<|IMAGE_UNUSE:246|>": 101550, "<|IMAGE_UNUSE:247|>": 101551, "<|IMAGE_UNUSE:248|>": 101552, "<|IMAGE_UNUSE:249|>": 101553, "<|IMAGE_UNUSE:250|>": 101554, "<|IMAGE_UNUSE:251|>": 101555, "<|IMAGE_UNUSE:252|>": 101556, "<|IMAGE_UNUSE:253|>": 101557, "<|IMAGE_UNUSE:254|>": 101558, "<|IMAGE_UNUSE:255|>": 101559, "<|IMAGE_UNUSE:256|>": 101560, "<|IMAGE_UNUSE:257|>": 101561, "<|IMAGE_UNUSE:258|>": 101562, "<|IMAGE_UNUSE:259|>": 101563, "<|IMAGE_UNUSE:260|>": 101564, "<|IMAGE_UNUSE:261|>": 101565, "<|IMAGE_UNUSE:262|>": 101566, "<|IMAGE_UNUSE:263|>": 101567, "<|IMAGE_UNUSE:264|>": 101568, "<|IMAGE_UNUSE:265|>": 101569, "<|IMAGE_UNUSE:266|>": 101570, "<|IMAGE_UNUSE:267|>": 101571, "<|IMAGE_UNUSE:268|>": 101572, "<|IMAGE_UNUSE:269|>": 101573, "<|IMAGE_UNUSE:270|>": 101574, "<|IMAGE_UNUSE:271|>": 101575, "<|IMAGE_UNUSE:272|>": 101576, "<|IMAGE_UNUSE:273|>": 101577, "<|IMAGE_UNUSE:274|>": 101578, "<|IMAGE_UNUSE:275|>": 101579, "<|IMAGE_UNUSE:276|>": 101580, "<|IMAGE_UNUSE:277|>": 101581, "<|IMAGE_UNUSE:278|>": 101582, "<|IMAGE_UNUSE:279|>": 101583, "<|IMAGE_UNUSE:280|>": 101584, "<|IMAGE_UNUSE:281|>": 101585, "<|IMAGE_UNUSE:282|>": 101586, "<|IMAGE_UNUSE:283|>": 101587, "<|IMAGE_UNUSE:284|>": 101588, "<|IMAGE_UNUSE:285|>": 101589, "<|IMAGE_UNUSE:286|>": 101590, "<|IMAGE_UNUSE:287|>": 101591, "<|IMAGE_UNUSE:288|>": 101592, "<|IMAGE_UNUSE:289|>": 101593, "<|IMAGE_UNUSE:290|>": 101594, "<|IMAGE_UNUSE:291|>": 101595, "<|IMAGE_UNUSE:292|>": 101596, "<|IMAGE_UNUSE:293|>": 101597, "<|IMAGE_UNUSE:294|>": 101598, "<|IMAGE_UNUSE:295|>": 101599, "<|IMAGE_UNUSE:296|>": 101600, "<|IMAGE_UNUSE:297|>": 101601, "<|IMAGE_UNUSE:298|>": 101602, "<|IMAGE_UNUSE:299|>": 101603, "<|IMAGE_UNUSE:300|>": 101604, "<|IMAGE_UNUSE:301|>": 101605, "<|IMAGE_UNUSE:302|>": 101606, "<|IMAGE_UNUSE:303|>": 101607, "<|IMAGE_UNUSE:304|>": 101608, "<|IMAGE_UNUSE:305|>": 101609, "<|IMAGE_UNUSE:306|>": 101610, "<|IMAGE_UNUSE:307|>": 101611, "<|IMAGE_UNUSE:308|>": 101612, "<|IMAGE_UNUSE:309|>": 101613, "<|IMAGE_UNUSE:310|>": 101614, "<|IMAGE_UNUSE:311|>": 101615, "<|IMAGE_UNUSE:312|>": 101616, "<|IMAGE_UNUSE:313|>": 101617, "<|IMAGE_UNUSE:314|>": 101618, "<|IMAGE_UNUSE:315|>": 101619, "<|IMAGE_UNUSE:316|>": 101620, "<|IMAGE_UNUSE:317|>": 101621, "<|IMAGE_UNUSE:318|>": 101622, "<|IMAGE_UNUSE:319|>": 101623, "<|IMAGE_UNUSE:320|>": 101624, "<|IMAGE_UNUSE:321|>": 101625, "<|IMAGE_UNUSE:322|>": 101626, "<|IMAGE_UNUSE:323|>": 101627, "<|IMAGE_UNUSE:324|>": 101628, "<|IMAGE_UNUSE:325|>": 101629, "<|IMAGE_UNUSE:326|>": 101630, "<|IMAGE_UNUSE:327|>": 101631, "<|IMAGE_UNUSE:328|>": 101632, "<|IMAGE_UNUSE:329|>": 101633, "<|IMAGE_UNUSE:330|>": 101634, "<|IMAGE_UNUSE:331|>": 101635, "<|IMAGE_UNUSE:332|>": 101636, "<|IMAGE_UNUSE:333|>": 101637, "<|IMAGE_UNUSE:334|>": 101638, "<|IMAGE_UNUSE:335|>": 101639, "<|IMAGE_UNUSE:336|>": 101640, "<|IMAGE_UNUSE:337|>": 101641, "<|IMAGE_UNUSE:338|>": 101642, "<|IMAGE_UNUSE:339|>": 101643, "<|IMAGE_UNUSE:340|>": 101644, "<|IMAGE_UNUSE:341|>": 101645, "<|IMAGE_UNUSE:342|>": 101646, "<|IMAGE_UNUSE:343|>": 101647, "<|IMAGE_UNUSE:344|>": 101648, "<|IMAGE_UNUSE:345|>": 101649, "<|IMAGE_UNUSE:346|>": 101650, "<|IMAGE_UNUSE:347|>": 101651, "<|IMAGE_UNUSE:348|>": 101652, "<|IMAGE_UNUSE:349|>": 101653, "<|IMAGE_UNUSE:350|>": 101654, "<|IMAGE_UNUSE:351|>": 101655, "<|IMAGE_UNUSE:352|>": 101656, "<|IMAGE_UNUSE:353|>": 101657, "<|IMAGE_UNUSE:354|>": 101658, "<|IMAGE_UNUSE:355|>": 101659, "<|IMAGE_UNUSE:356|>": 101660, "<|IMAGE_UNUSE:357|>": 101661, "<|IMAGE_UNUSE:358|>": 101662, "<|IMAGE_UNUSE:359|>": 101663, "<|IMAGE_UNUSE:360|>": 101664, "<|IMAGE_UNUSE:361|>": 101665, "<|IMAGE_UNUSE:362|>": 101666, "<|IMAGE_UNUSE:363|>": 101667, "<|IMAGE_UNUSE:364|>": 101668, "<|IMAGE_UNUSE:365|>": 101669, "<|IMAGE_UNUSE:366|>": 101670, "<|IMAGE_UNUSE:367|>": 101671, "<|IMAGE_UNUSE:368|>": 101672, "<|IMAGE_UNUSE:369|>": 101673, "<|IMAGE_UNUSE:370|>": 101674, "<|IMAGE_UNUSE:371|>": 101675, "<|IMAGE_UNUSE:372|>": 101676, "<|IMAGE_UNUSE:373|>": 101677, "<|IMAGE_UNUSE:374|>": 101678, "<|IMAGE_UNUSE:375|>": 101679, "<|IMAGE_UNUSE:376|>": 101680, "<|IMAGE_UNUSE:377|>": 101681, "<|IMAGE_UNUSE:378|>": 101682, "<|IMAGE_UNUSE:379|>": 101683, "<|IMAGE_UNUSE:380|>": 101684, "<|IMAGE_UNUSE:381|>": 101685, "<|IMAGE_UNUSE:382|>": 101686, "<|IMAGE_UNUSE:383|>": 101687, "<|IMAGE_UNUSE:384|>": 101688, "<|IMAGE_UNUSE:385|>": 101689, "<|IMAGE_UNUSE:386|>": 101690, "<|IMAGE_UNUSE:387|>": 101691, "<|IMAGE_UNUSE:388|>": 101692, "<|IMAGE_UNUSE:389|>": 101693, "<|IMAGE_UNUSE:390|>": 101694, "<|IMAGE_UNUSE:391|>": 101695, "<|IMAGE_UNUSE:392|>": 101696, "<|IMAGE_UNUSE:393|>": 101697, "<|IMAGE_UNUSE:394|>": 101698, "<|IMAGE_UNUSE:395|>": 101699, "<|IMAGE_UNUSE:396|>": 101700, "<|IMAGE_UNUSE:397|>": 101701, "<|IMAGE_UNUSE:398|>": 101702, "<|IMAGE_UNUSE:399|>": 101703, "<|IMAGE_UNUSE:400|>": 101704, "<|IMAGE_UNUSE:401|>": 101705, "<|IMAGE_UNUSE:402|>": 101706, "<|IMAGE_UNUSE:403|>": 101707, "<|IMAGE_UNUSE:404|>": 101708, "<|IMAGE_UNUSE:405|>": 101709, "<|IMAGE_UNUSE:406|>": 101710, "<|IMAGE_UNUSE:407|>": 101711, "<|IMAGE_UNUSE:408|>": 101712, "<|IMAGE_UNUSE:409|>": 101713, "<|IMAGE_UNUSE:410|>": 101714, "<|IMAGE_UNUSE:411|>": 101715, "<|IMAGE_UNUSE:412|>": 101716, "<|IMAGE_UNUSE:413|>": 101717, "<|IMAGE_UNUSE:414|>": 101718, "<|IMAGE_UNUSE:415|>": 101719, "<|IMAGE_UNUSE:416|>": 101720, "<|IMAGE_UNUSE:417|>": 101721, "<|IMAGE_UNUSE:418|>": 101722, "<|IMAGE_UNUSE:419|>": 101723, "<|IMAGE_UNUSE:420|>": 101724, "<|IMAGE_UNUSE:421|>": 101725, "<|IMAGE_UNUSE:422|>": 101726, "<|IMAGE_UNUSE:423|>": 101727, "<|IMAGE_UNUSE:424|>": 101728, "<|IMAGE_UNUSE:425|>": 101729, "<|IMAGE_UNUSE:426|>": 101730, "<|IMAGE_UNUSE:427|>": 101731, "<|IMAGE_UNUSE:428|>": 101732, "<|IMAGE_UNUSE:429|>": 101733, "<|IMAGE_UNUSE:430|>": 101734, "<|IMAGE_UNUSE:431|>": 101735, "<|IMAGE_UNUSE:432|>": 101736, "<|IMAGE_UNUSE:433|>": 101737, "<|IMAGE_UNUSE:434|>": 101738, "<|IMAGE_UNUSE:435|>": 101739, "<|IMAGE_UNUSE:436|>": 101740, "<|IMAGE_UNUSE:437|>": 101741, "<|IMAGE_UNUSE:438|>": 101742, "<|IMAGE_UNUSE:439|>": 101743, "<|IMAGE_UNUSE:440|>": 101744, "<|IMAGE_UNUSE:441|>": 101745, "<|IMAGE_UNUSE:442|>": 101746, "<|IMAGE_UNUSE:443|>": 101747, "<|IMAGE_UNUSE:444|>": 101748, "<|IMAGE_UNUSE:445|>": 101749, "<|IMAGE_UNUSE:446|>": 101750, "<|IMAGE_UNUSE:447|>": 101751, "<|IMAGE_UNUSE:448|>": 101752, "<|IMAGE_UNUSE:449|>": 101753, "<|IMAGE_UNUSE:450|>": 101754, "<|IMAGE_UNUSE:451|>": 101755, "<|IMAGE_UNUSE:452|>": 101756, "<|IMAGE_UNUSE:453|>": 101757, "<|IMAGE_UNUSE:454|>": 101758, "<|IMAGE_UNUSE:455|>": 101759, "<|IMAGE_UNUSE:456|>": 101760, "<|IMAGE_UNUSE:457|>": 101761, "<|IMAGE_UNUSE:458|>": 101762, "<|IMAGE_UNUSE:459|>": 101763, "<|IMAGE_UNUSE:460|>": 101764, "<|IMAGE_UNUSE:461|>": 101765, "<|IMAGE_UNUSE:462|>": 101766, "<|IMAGE_UNUSE:463|>": 101767, "<|IMAGE_UNUSE:464|>": 101768, "<|IMAGE_UNUSE:465|>": 101769, "<|IMAGE_UNUSE:466|>": 101770, "<|IMAGE_UNUSE:467|>": 101771, "<|IMAGE_UNUSE:468|>": 101772, "<|IMAGE_UNUSE:469|>": 101773, "<|IMAGE_UNUSE:470|>": 101774, "<|IMAGE_UNUSE:471|>": 101775, "<|IMAGE_UNUSE:472|>": 101776, "<|IMAGE_UNUSE:473|>": 101777, "<|IMAGE_UNUSE:474|>": 101778, "<|IMAGE_UNUSE:475|>": 101779, "<|IMAGE_UNUSE:476|>": 101780, "<|IMAGE_UNUSE:477|>": 101781, "<|IMAGE_UNUSE:478|>": 101782, "<|IMAGE_UNUSE:479|>": 101783, "<|IMAGE_UNUSE:480|>": 101784, "<|IMAGE_UNUSE:481|>": 101785, "<|IMAGE_UNUSE:482|>": 101786, "<|IMAGE_UNUSE:483|>": 101787, "<|IMAGE_UNUSE:484|>": 101788, "<|IMAGE_UNUSE:485|>": 101789, "<|IMAGE_UNUSE:486|>": 101790, "<|IMAGE_UNUSE:487|>": 101791, "<|IMAGE_UNUSE:488|>": 101792, "<|IMAGE_UNUSE:489|>": 101793, "<|IMAGE_UNUSE:490|>": 101794, "<|IMAGE_UNUSE:491|>": 101795, "<|IMAGE_UNUSE:492|>": 101796, "<|IMAGE_UNUSE:493|>": 101797, "<|IMAGE_UNUSE:494|>": 101798, "<|IMAGE_UNUSE:495|>": 101799, "<|IMAGE_UNUSE:496|>": 101800, "<|IMAGE_UNUSE:497|>": 101801, "<|IMAGE_UNUSE:498|>": 101802, "<|IMAGE_UNUSE:499|>": 101803, "<|IMAGE_UNUSE:500|>": 101804, "<|IMAGE_UNUSE:501|>": 101805, "<|IMAGE_UNUSE:502|>": 101806, "<|IMAGE_UNUSE:503|>": 101807, "<|IMAGE_UNUSE:504|>": 101808, "<|IMAGE_UNUSE:505|>": 101809, "<|IMAGE_UNUSE:506|>": 101810, "<|IMAGE_UNUSE:507|>": 101811, "<|IMAGE_UNUSE:508|>": 101812, "<|IMAGE_UNUSE:509|>": 101813, "<|IMAGE_UNUSE:510|>": 101814, "<|IMAGE_UNUSE:511|>": 101815, "<|IMAGE_UNUSE:512|>": 101816, "<|IMAGE_UNUSE:513|>": 101817, "<|IMAGE_UNUSE:514|>": 101818, "<|IMAGE_UNUSE:515|>": 101819, "<|IMAGE_UNUSE:516|>": 101820, "<|IMAGE_UNUSE:517|>": 101821, "<|IMAGE_UNUSE:518|>": 101822, "<|IMAGE_UNUSE:519|>": 101823, "<|IMAGE_UNUSE:520|>": 101824, "<|IMAGE_UNUSE:521|>": 101825, "<|IMAGE_UNUSE:522|>": 101826, "<|IMAGE_UNUSE:523|>": 101827, "<|IMAGE_UNUSE:524|>": 101828, "<|IMAGE_UNUSE:525|>": 101829, "<|IMAGE_UNUSE:526|>": 101830, "<|IMAGE_UNUSE:527|>": 101831, "<|IMAGE_UNUSE:528|>": 101832, "<|IMAGE_UNUSE:529|>": 101833, "<|IMAGE_UNUSE:530|>": 101834, "<|IMAGE_UNUSE:531|>": 101835, "<|IMAGE_UNUSE:532|>": 101836, "<|IMAGE_UNUSE:533|>": 101837, "<|IMAGE_UNUSE:534|>": 101838, "<|IMAGE_UNUSE:535|>": 101839, "<|IMAGE_UNUSE:536|>": 101840, "<|IMAGE_UNUSE:537|>": 101841, "<|IMAGE_UNUSE:538|>": 101842, "<|IMAGE_UNUSE:539|>": 101843, "<|IMAGE_UNUSE:540|>": 101844, "<|IMAGE_UNUSE:541|>": 101845, "<|IMAGE_UNUSE:542|>": 101846, "<|IMAGE_UNUSE:543|>": 101847, "<|IMAGE_UNUSE:544|>": 101848, "<|IMAGE_UNUSE:545|>": 101849, "<|IMAGE_UNUSE:546|>": 101850, "<|IMAGE_UNUSE:547|>": 101851, "<|IMAGE_UNUSE:548|>": 101852, "<|IMAGE_UNUSE:549|>": 101853, "<|IMAGE_UNUSE:550|>": 101854, "<|IMAGE_UNUSE:551|>": 101855, "<|IMAGE_UNUSE:552|>": 101856, "<|IMAGE_UNUSE:553|>": 101857, "<|IMAGE_UNUSE:554|>": 101858, "<|IMAGE_UNUSE:555|>": 101859, "<|IMAGE_UNUSE:556|>": 101860, "<|IMAGE_UNUSE:557|>": 101861, "<|IMAGE_UNUSE:558|>": 101862, "<|IMAGE_UNUSE:559|>": 101863, "<|IMAGE_UNUSE:560|>": 101864, "<|IMAGE_UNUSE:561|>": 101865, "<|IMAGE_UNUSE:562|>": 101866, "<|IMAGE_UNUSE:563|>": 101867, "<|IMAGE_UNUSE:564|>": 101868, "<|IMAGE_UNUSE:565|>": 101869, "<|IMAGE_UNUSE:566|>": 101870, "<|IMAGE_UNUSE:567|>": 101871, "<|IMAGE_UNUSE:568|>": 101872, "<|IMAGE_UNUSE:569|>": 101873, "<|IMAGE_UNUSE:570|>": 101874, "<|IMAGE_UNUSE:571|>": 101875, "<|IMAGE_UNUSE:572|>": 101876, "<|IMAGE_UNUSE:573|>": 101877, "<|IMAGE_UNUSE:574|>": 101878, "<|IMAGE_UNUSE:575|>": 101879, "<|IMAGE_UNUSE:576|>": 101880, "<|IMAGE_UNUSE:577|>": 101881, "<|IMAGE_UNUSE:578|>": 101882, "<|IMAGE_UNUSE:579|>": 101883, "<|IMAGE_UNUSE:580|>": 101884, "<|IMAGE_UNUSE:581|>": 101885, "<|IMAGE_UNUSE:582|>": 101886, "<|IMAGE_UNUSE:583|>": 101887, "<|IMAGE_UNUSE:584|>": 101888, "<|IMAGE_UNUSE:585|>": 101889, "<|IMAGE_UNUSE:586|>": 101890, "<|IMAGE_UNUSE:587|>": 101891, "<|IMAGE_UNUSE:588|>": 101892, "<|IMAGE_UNUSE:589|>": 101893, "<|IMAGE_UNUSE:590|>": 101894, "<|IMAGE_UNUSE:591|>": 101895, "<|IMAGE_UNUSE:592|>": 101896, "<|IMAGE_UNUSE:593|>": 101897, "<|IMAGE_UNUSE:594|>": 101898, "<|IMAGE_UNUSE:595|>": 101899, "<|IMAGE_UNUSE:596|>": 101900, "<|IMAGE_UNUSE:597|>": 101901, "<|IMAGE_UNUSE:598|>": 101902, "<|IMAGE_UNUSE:599|>": 101903, "<|IMAGE_UNUSE:600|>": 101904, "<|IMAGE_UNUSE:601|>": 101905, "<|IMAGE_UNUSE:602|>": 101906, "<|IMAGE_UNUSE:603|>": 101907, "<|IMAGE_UNUSE:604|>": 101908, "<|IMAGE_UNUSE:605|>": 101909, "<|IMAGE_UNUSE:606|>": 101910, "<|IMAGE_UNUSE:607|>": 101911, "<|IMAGE_UNUSE:608|>": 101912, "<|IMAGE_UNUSE:609|>": 101913, "<|IMAGE_UNUSE:610|>": 101914, "<|IMAGE_UNUSE:611|>": 101915, "<|IMAGE_UNUSE:612|>": 101916, "<|IMAGE_UNUSE:613|>": 101917, "<|IMAGE_UNUSE:614|>": 101918, "<|IMAGE_UNUSE:615|>": 101919, "<|IMAGE_UNUSE:616|>": 101920, "<|IMAGE_UNUSE:617|>": 101921, "<|IMAGE_UNUSE:618|>": 101922, "<|IMAGE_UNUSE:619|>": 101923, "<|IMAGE_UNUSE:620|>": 101924, "<|IMAGE_UNUSE:621|>": 101925, "<|IMAGE_UNUSE:622|>": 101926, "<|IMAGE_UNUSE:623|>": 101927, "<|IMAGE_UNUSE:624|>": 101928, "<|IMAGE_UNUSE:625|>": 101929, "<|IMAGE_UNUSE:626|>": 101930, "<|IMAGE_UNUSE:627|>": 101931, "<|IMAGE_UNUSE:628|>": 101932, "<|IMAGE_UNUSE:629|>": 101933, "<|IMAGE_UNUSE:630|>": 101934, "<|IMAGE_UNUSE:631|>": 101935, "<|IMAGE_UNUSE:632|>": 101936, "<|IMAGE_UNUSE:633|>": 101937, "<|IMAGE_UNUSE:634|>": 101938, "<|IMAGE_UNUSE:635|>": 101939, "<|IMAGE_UNUSE:636|>": 101940, "<|IMAGE_UNUSE:637|>": 101941, "<|IMAGE_UNUSE:638|>": 101942, "<|IMAGE_UNUSE:639|>": 101943, "<|IMAGE_UNUSE:640|>": 101944, "<|IMAGE_UNUSE:641|>": 101945, "<|IMAGE_UNUSE:642|>": 101946, "<|IMAGE_UNUSE:643|>": 101947, "<|IMAGE_UNUSE:644|>": 101948, "<|IMAGE_UNUSE:645|>": 101949, "<|IMAGE_UNUSE:646|>": 101950, "<|IMAGE_UNUSE:647|>": 101951, "<|IMAGE_UNUSE:648|>": 101952, "<|IMAGE_UNUSE:649|>": 101953, "<|IMAGE_UNUSE:650|>": 101954, "<|IMAGE_UNUSE:651|>": 101955, "<|IMAGE_UNUSE:652|>": 101956, "<|IMAGE_UNUSE:653|>": 101957, "<|IMAGE_UNUSE:654|>": 101958, "<|IMAGE_UNUSE:655|>": 101959, "<|IMAGE_UNUSE:656|>": 101960, "<|IMAGE_UNUSE:657|>": 101961, "<|IMAGE_UNUSE:658|>": 101962, "<|IMAGE_UNUSE:659|>": 101963, "<|IMAGE_UNUSE:660|>": 101964, "<|IMAGE_UNUSE:661|>": 101965, "<|IMAGE_UNUSE:662|>": 101966, "<|IMAGE_UNUSE:663|>": 101967, "<|IMAGE_UNUSE:664|>": 101968, "<|IMAGE_UNUSE:665|>": 101969, "<|IMAGE_UNUSE:666|>": 101970, "<|IMAGE_UNUSE:667|>": 101971, "<|IMAGE_UNUSE:668|>": 101972, "<|IMAGE_UNUSE:669|>": 101973, "<|IMAGE_UNUSE:670|>": 101974, "<|IMAGE_UNUSE:671|>": 101975, "<|IMAGE_UNUSE:672|>": 101976, "<|IMAGE_UNUSE:673|>": 101977, "<|IMAGE_UNUSE:674|>": 101978, "<|IMAGE_UNUSE:675|>": 101979, "<|IMAGE_UNUSE:676|>": 101980, "<|IMAGE_UNUSE:677|>": 101981, "<|IMAGE_UNUSE:678|>": 101982, "<|IMAGE_UNUSE:679|>": 101983, "<|IMAGE_UNUSE:680|>": 101984, "<|IMAGE_UNUSE:681|>": 101985, "<|IMAGE_UNUSE:682|>": 101986, "<|IMAGE_UNUSE:683|>": 101987, "<|IMAGE_UNUSE:684|>": 101988, "<|IMAGE_UNUSE:685|>": 101989, "<|IMAGE_UNUSE:686|>": 101990, "<|IMAGE_UNUSE:687|>": 101991, "<|IMAGE_UNUSE:688|>": 101992, "<|IMAGE_UNUSE:689|>": 101993, "<|IMAGE_UNUSE:690|>": 101994, "<|IMAGE_UNUSE:691|>": 101995, "<|IMAGE_UNUSE:692|>": 101996, "<|IMAGE_UNUSE:693|>": 101997, "<|IMAGE_UNUSE:694|>": 101998, "<|IMAGE_UNUSE:695|>": 101999, "<|IMAGE_UNUSE:696|>": 102000, "<|IMAGE_UNUSE:697|>": 102001, "<|IMAGE_UNUSE:698|>": 102002, "<|IMAGE_UNUSE:699|>": 102003, "<|IMAGE_UNUSE:700|>": 102004, "<|IMAGE_UNUSE:701|>": 102005, "<|IMAGE_UNUSE:702|>": 102006, "<|IMAGE_UNUSE:703|>": 102007, "<|IMAGE_UNUSE:704|>": 102008, "<|IMAGE_UNUSE:705|>": 102009, "<|IMAGE_UNUSE:706|>": 102010, "<|IMAGE_UNUSE:707|>": 102011, "<|IMAGE_UNUSE:708|>": 102012, "<|IMAGE_UNUSE:709|>": 102013, "<|IMAGE_UNUSE:710|>": 102014, "<|IMAGE_UNUSE:711|>": 102015, "<|IMAGE_UNUSE:712|>": 102016, "<|IMAGE_UNUSE:713|>": 102017, "<|IMAGE_UNUSE:714|>": 102018, "<|IMAGE_UNUSE:715|>": 102019, "<|IMAGE_UNUSE:716|>": 102020, "<|IMAGE_UNUSE:717|>": 102021, "<|IMAGE_UNUSE:718|>": 102022, "<|IMAGE_UNUSE:719|>": 102023, "<|IMAGE_UNUSE:720|>": 102024, "<|IMAGE_UNUSE:721|>": 102025, "<|IMAGE_UNUSE:722|>": 102026, "<|IMAGE_UNUSE:723|>": 102027, "<|IMAGE_UNUSE:724|>": 102028, "<|IMAGE_UNUSE:725|>": 102029, "<|IMAGE_UNUSE:726|>": 102030, "<|IMAGE_UNUSE:727|>": 102031, "<|IMAGE_UNUSE:728|>": 102032, "<|IMAGE_UNUSE:729|>": 102033, "<|IMAGE_UNUSE:730|>": 102034, "<|IMAGE_UNUSE:731|>": 102035, "<|IMAGE_UNUSE:732|>": 102036, "<|IMAGE_UNUSE:733|>": 102037, "<|IMAGE_UNUSE:734|>": 102038, "<|IMAGE_UNUSE:735|>": 102039, "<|IMAGE_UNUSE:736|>": 102040, "<|IMAGE_UNUSE:737|>": 102041, "<|IMAGE_UNUSE:738|>": 102042, "<|IMAGE_UNUSE:739|>": 102043, "<|IMAGE_UNUSE:740|>": 102044, "<|IMAGE_UNUSE:741|>": 102045, "<|IMAGE_UNUSE:742|>": 102046, "<|IMAGE_UNUSE:743|>": 102047, "<|IMAGE_UNUSE:744|>": 102048, "<|IMAGE_UNUSE:745|>": 102049, "<|IMAGE_UNUSE:746|>": 102050, "<|IMAGE_UNUSE:747|>": 102051, "<|IMAGE_UNUSE:748|>": 102052, "<|IMAGE_UNUSE:749|>": 102053, "<|IMAGE_UNUSE:750|>": 102054, "<|IMAGE_UNUSE:751|>": 102055, "<|IMAGE_UNUSE:752|>": 102056, "<|IMAGE_UNUSE:753|>": 102057, "<|IMAGE_UNUSE:754|>": 102058, "<|IMAGE_UNUSE:755|>": 102059, "<|IMAGE_UNUSE:756|>": 102060, "<|IMAGE_UNUSE:757|>": 102061, "<|IMAGE_UNUSE:758|>": 102062, "<|IMAGE_UNUSE:759|>": 102063, "<|IMAGE_UNUSE:760|>": 102064, "<|IMAGE_UNUSE:761|>": 102065, "<|IMAGE_UNUSE:762|>": 102066, "<|IMAGE_UNUSE:763|>": 102067, "<|IMAGE_UNUSE:764|>": 102068, "<|IMAGE_UNUSE:765|>": 102069, "<|IMAGE_UNUSE:766|>": 102070, "<|IMAGE_UNUSE:767|>": 102071, "<|IMAGE_UNUSE:768|>": 102072, "<|IMAGE_UNUSE:769|>": 102073, "<|IMAGE_UNUSE:770|>": 102074, "<|IMAGE_UNUSE:771|>": 102075, "<|IMAGE_UNUSE:772|>": 102076, "<|IMAGE_UNUSE:773|>": 102077, "<|IMAGE_UNUSE:774|>": 102078, "<|IMAGE_UNUSE:775|>": 102079, "<|IMAGE_UNUSE:776|>": 102080, "<|IMAGE_UNUSE:777|>": 102081, "<|IMAGE_UNUSE:778|>": 102082, "<|IMAGE_UNUSE:779|>": 102083, "<|IMAGE_UNUSE:780|>": 102084, "<|IMAGE_UNUSE:781|>": 102085, "<|IMAGE_UNUSE:782|>": 102086, "<|IMAGE_UNUSE:783|>": 102087, "<|IMAGE_UNUSE:784|>": 102088, "<|IMAGE_UNUSE:785|>": 102089, "<|IMAGE_UNUSE:786|>": 102090, "<|IMAGE_UNUSE:787|>": 102091, "<|IMAGE_UNUSE:788|>": 102092, "<|IMAGE_UNUSE:789|>": 102093, "<|IMAGE_UNUSE:790|>": 102094, "<|IMAGE_UNUSE:791|>": 102095, "<|IMAGE_UNUSE:792|>": 102096, "<|IMAGE_UNUSE:793|>": 102097, "<|IMAGE_UNUSE:794|>": 102098, "<|IMAGE_UNUSE:795|>": 102099, "<|IMAGE_UNUSE:796|>": 102100, "<|IMAGE_UNUSE:797|>": 102101, "<|IMAGE_UNUSE:798|>": 102102, "<|IMAGE_UNUSE:799|>": 102103, "<|IMAGE_UNUSE:800|>": 102104, "<|IMAGE_UNUSE:801|>": 102105, "<|IMAGE_UNUSE:802|>": 102106, "<|IMAGE_UNUSE:803|>": 102107, "<|IMAGE_UNUSE:804|>": 102108, "<|IMAGE_UNUSE:805|>": 102109, "<|IMAGE_UNUSE:806|>": 102110, "<|IMAGE_UNUSE:807|>": 102111, "<|IMAGE_UNUSE:808|>": 102112, "<|IMAGE_UNUSE:809|>": 102113, "<|IMAGE_UNUSE:810|>": 102114, "<|IMAGE_UNUSE:811|>": 102115, "<|IMAGE_UNUSE:812|>": 102116, "<|IMAGE_UNUSE:813|>": 102117, "<|IMAGE_UNUSE:814|>": 102118, "<|IMAGE_UNUSE:815|>": 102119, "<|IMAGE_UNUSE:816|>": 102120, "<|IMAGE_UNUSE:817|>": 102121, "<|IMAGE_UNUSE:818|>": 102122, "<|IMAGE_UNUSE:819|>": 102123, "<|IMAGE_UNUSE:820|>": 102124, "<|IMAGE_UNUSE:821|>": 102125, "<|IMAGE_UNUSE:822|>": 102126, "<|IMAGE_UNUSE:823|>": 102127, "<|IMAGE_UNUSE:824|>": 102128, "<|IMAGE_UNUSE:825|>": 102129, "<|IMAGE_UNUSE:826|>": 102130, "<|IMAGE_UNUSE:827|>": 102131, "<|IMAGE_UNUSE:828|>": 102132, "<|IMAGE_UNUSE:829|>": 102133, "<|IMAGE_UNUSE:830|>": 102134, "<|IMAGE_UNUSE:831|>": 102135, "<|IMAGE_UNUSE:832|>": 102136, "<|IMAGE_UNUSE:833|>": 102137, "<|IMAGE_UNUSE:834|>": 102138, "<|IMAGE_UNUSE:835|>": 102139, "<|IMAGE_UNUSE:836|>": 102140, "<|IMAGE_UNUSE:837|>": 102141, "<|IMAGE_UNUSE:838|>": 102142, "<|IMAGE_UNUSE:839|>": 102143, "<|IMAGE_UNUSE:840|>": 102144, "<|IMAGE_UNUSE:841|>": 102145, "<|IMAGE_UNUSE:842|>": 102146, "<|IMAGE_UNUSE:843|>": 102147, "<|IMAGE_UNUSE:844|>": 102148, "<|IMAGE_UNUSE:845|>": 102149, "<|IMAGE_UNUSE:846|>": 102150, "<|IMAGE_UNUSE:847|>": 102151, "<|IMAGE_UNUSE:848|>": 102152, "<|IMAGE_UNUSE:849|>": 102153, "<|IMAGE_UNUSE:850|>": 102154, "<|IMAGE_UNUSE:851|>": 102155, "<|IMAGE_UNUSE:852|>": 102156, "<|IMAGE_UNUSE:853|>": 102157, "<|IMAGE_UNUSE:854|>": 102158, "<|IMAGE_UNUSE:855|>": 102159, "<|IMAGE_UNUSE:856|>": 102160, "<|IMAGE_UNUSE:857|>": 102161, "<|IMAGE_UNUSE:858|>": 102162, "<|IMAGE_UNUSE:859|>": 102163, "<|IMAGE_UNUSE:860|>": 102164, "<|IMAGE_UNUSE:861|>": 102165, "<|IMAGE_UNUSE:862|>": 102166, "<|IMAGE_UNUSE:863|>": 102167, "<|IMAGE_UNUSE:864|>": 102168, "<|IMAGE_UNUSE:865|>": 102169, "<|IMAGE_UNUSE:866|>": 102170, "<|IMAGE_UNUSE:867|>": 102171, "<|IMAGE_UNUSE:868|>": 102172, "<|IMAGE_UNUSE:869|>": 102173, "<|IMAGE_UNUSE:870|>": 102174, "<|IMAGE_UNUSE:871|>": 102175, "<|IMAGE_UNUSE:872|>": 102176, "<|IMAGE_UNUSE:873|>": 102177, "<|IMAGE_UNUSE:874|>": 102178, "<|IMAGE_UNUSE:875|>": 102179, "<|IMAGE_UNUSE:876|>": 102180, "<|IMAGE_UNUSE:877|>": 102181, "<|IMAGE_UNUSE:878|>": 102182, "<|IMAGE_UNUSE:879|>": 102183, "<|IMAGE_UNUSE:880|>": 102184, "<|IMAGE_UNUSE:881|>": 102185, "<|IMAGE_UNUSE:882|>": 102186, "<|IMAGE_UNUSE:883|>": 102187, "<|IMAGE_UNUSE:884|>": 102188, "<|IMAGE_UNUSE:885|>": 102189, "<|IMAGE_UNUSE:886|>": 102190, "<|IMAGE_UNUSE:887|>": 102191, "<|IMAGE_UNUSE:888|>": 102192, "<|IMAGE_UNUSE:889|>": 102193, "<|IMAGE_UNUSE:890|>": 102194, "<|IMAGE_UNUSE:891|>": 102195, "<|IMAGE_UNUSE:892|>": 102196, "<|IMAGE_UNUSE:893|>": 102197, "<|IMAGE_UNUSE:894|>": 102198, "<|IMAGE_UNUSE:895|>": 102199, "<|IMAGE_UNUSE:896|>": 102200, "<|IMAGE_UNUSE:897|>": 102201, "<|IMAGE_UNUSE:898|>": 102202, "<|IMAGE_UNUSE:899|>": 102203, "<|IMAGE_UNUSE:900|>": 102204, "<|IMAGE_UNUSE:901|>": 102205, "<|IMAGE_UNUSE:902|>": 102206, "<|IMAGE_UNUSE:903|>": 102207, "<|IMAGE_UNUSE:904|>": 102208, "<|IMAGE_UNUSE:905|>": 102209, "<|IMAGE_UNUSE:906|>": 102210, "<|IMAGE_UNUSE:907|>": 102211, "<|IMAGE_UNUSE:908|>": 102212, "<|IMAGE_UNUSE:909|>": 102213, "<|IMAGE_UNUSE:910|>": 102214, "<|IMAGE_UNUSE:911|>": 102215, "<|IMAGE_UNUSE:912|>": 102216, "<|IMAGE_UNUSE:913|>": 102217, "<|IMAGE_UNUSE:914|>": 102218, "<|IMAGE_UNUSE:915|>": 102219, "<|IMAGE_UNUSE:916|>": 102220, "<|IMAGE_UNUSE:917|>": 102221, "<|IMAGE_UNUSE:918|>": 102222, "<|IMAGE_UNUSE:919|>": 102223, "<|IMAGE_UNUSE:920|>": 102224, "<|IMAGE_UNUSE:921|>": 102225, "<|IMAGE_UNUSE:922|>": 102226, "<|IMAGE_UNUSE:923|>": 102227, "<|IMAGE_UNUSE:924|>": 102228, "<|IMAGE_UNUSE:925|>": 102229, "<|IMAGE_UNUSE:926|>": 102230, "<|IMAGE_UNUSE:927|>": 102231, "<|IMAGE_UNUSE:928|>": 102232, "<|IMAGE_UNUSE:929|>": 102233, "<|IMAGE_UNUSE:930|>": 102234, "<|IMAGE_UNUSE:931|>": 102235, "<|IMAGE_UNUSE:932|>": 102236, "<|IMAGE_UNUSE:933|>": 102237, "<|IMAGE_UNUSE:934|>": 102238, "<|IMAGE_UNUSE:935|>": 102239, "<|IMAGE_UNUSE:936|>": 102240, "<|IMAGE_UNUSE:937|>": 102241, "<|IMAGE_UNUSE:938|>": 102242, "<|IMAGE_UNUSE:939|>": 102243, "<|IMAGE_UNUSE:940|>": 102244, "<|IMAGE_UNUSE:941|>": 102245, "<|IMAGE_UNUSE:942|>": 102246, "<|IMAGE_UNUSE:943|>": 102247, "<|IMAGE_UNUSE:944|>": 102248, "<|IMAGE_UNUSE:945|>": 102249, "<|IMAGE_UNUSE:946|>": 102250, "<|IMAGE_UNUSE:947|>": 102251, "<|IMAGE_UNUSE:948|>": 102252, "<|IMAGE_UNUSE:949|>": 102253, "<|IMAGE_UNUSE:950|>": 102254, "<|IMAGE_UNUSE:951|>": 102255, "<|IMAGE_UNUSE:952|>": 102256, "<|IMAGE_UNUSE:953|>": 102257, "<|IMAGE_UNUSE:954|>": 102258, "<|IMAGE_UNUSE:955|>": 102259, "<|IMAGE_UNUSE:956|>": 102260, "<|IMAGE_UNUSE:957|>": 102261, "<|IMAGE_UNUSE:958|>": 102262, "<|IMAGE_UNUSE:959|>": 102263, "<|IMAGE_UNUSE:960|>": 102264, "<|IMAGE_UNUSE:961|>": 102265, "<|IMAGE_UNUSE:962|>": 102266, "<|IMAGE_UNUSE:963|>": 102267, "<|IMAGE_UNUSE:964|>": 102268, "<|IMAGE_UNUSE:965|>": 102269, "<|IMAGE_UNUSE:966|>": 102270, "<|IMAGE_UNUSE:967|>": 102271, "<|IMAGE_UNUSE:968|>": 102272, "<|IMAGE_UNUSE:969|>": 102273, "<|IMAGE_UNUSE:970|>": 102274, "<|IMAGE_UNUSE:971|>": 102275, "<|IMAGE_UNUSE:972|>": 102276, "<|IMAGE_UNUSE:973|>": 102277, "<|IMAGE_UNUSE:974|>": 102278, "<|IMAGE_UNUSE:975|>": 102279, "<|IMAGE_UNUSE:976|>": 102280, "<|IMAGE_UNUSE:977|>": 102281, "<|IMAGE_UNUSE:978|>": 102282, "<|IMAGE_UNUSE:979|>": 102283, "<|IMAGE_UNUSE:980|>": 102284, "<|IMAGE_UNUSE:981|>": 102285, "<|IMAGE_UNUSE:982|>": 102286, "<|IMAGE_UNUSE:983|>": 102287, "<|IMAGE_UNUSE:984|>": 102288, "<|IMAGE_UNUSE:985|>": 102289, "<|IMAGE_UNUSE:986|>": 102290, "<|IMAGE_UNUSE:987|>": 102291, "<|IMAGE_UNUSE:988|>": 102292, "<|IMAGE_UNUSE:989|>": 102293, "<|IMAGE_UNUSE:990|>": 102294, "<|IMAGE_UNUSE:991|>": 102295, "<|IMAGE_UNUSE:992|>": 102296, "<|IMAGE_UNUSE:993|>": 102297, "<|IMAGE_UNUSE:994|>": 102298, "<|IMAGE_UNUSE:995|>": 102299, "<|IMAGE_UNUSE:996|>": 102300, "<|IMAGE_UNUSE:997|>": 102301, "<|IMAGE_UNUSE:998|>": 102302, "<|IMAGE_UNUSE:999|>": 102303, "<|IMAGE_UNUSE:1000|>": 102304, "<|IMAGE_UNUSE:1001|>": 102305, "<|IMAGE_UNUSE:1002|>": 102306, "<|IMAGE_UNUSE:1003|>": 102307, "<|IMAGE_UNUSE:1004|>": 102308, "<|IMAGE_UNUSE:1005|>": 102309, "<|IMAGE_UNUSE:1006|>": 102310, "<|IMAGE_UNUSE:1007|>": 102311, "<|IMAGE_UNUSE:1008|>": 102312, "<|IMAGE_UNUSE:1009|>": 102313, "<|IMAGE_UNUSE:1010|>": 102314, "<|IMAGE_UNUSE:1011|>": 102315, "<|IMAGE_UNUSE:1012|>": 102316, "<|IMAGE_UNUSE:1013|>": 102317, "<|IMAGE_UNUSE:1014|>": 102318, "<|IMAGE_UNUSE:1015|>": 102319, "<|IMAGE_UNUSE:1016|>": 102320, "<|IMAGE_UNUSE:1017|>": 102321, "<|IMAGE_UNUSE:1018|>": 102322, "<|IMAGE_UNUSE:1019|>": 102323, "<|IMAGE_UNUSE:1020|>": 102324, "<|IMAGE_UNUSE:1021|>": 102325, "<|IMAGE_UNUSE:1022|>": 102326, "<|IMAGE_UNUSE:1023|>": 102327, "<|IMAGE_UNUSE:1024|>": 102328, "<|IMAGE_UNUSE:1025|>": 102329, "<|IMAGE_UNUSE:1026|>": 102330, "<|IMAGE_UNUSE:1027|>": 102331, "<|IMAGE_UNUSE:1028|>": 102332, "<|IMAGE_UNUSE:1029|>": 102333, "<|IMAGE_UNUSE:1030|>": 102334, "<|IMAGE_UNUSE:1031|>": 102335, "<|IMAGE_UNUSE:1032|>": 102336, "<|IMAGE_UNUSE:1033|>": 102337, "<|IMAGE_UNUSE:1034|>": 102338, "<|IMAGE_UNUSE:1035|>": 102339, "<|IMAGE_UNUSE:1036|>": 102340, "<|IMAGE_UNUSE:1037|>": 102341, "<|IMAGE_UNUSE:1038|>": 102342, "<|STREAMING_BEGIN|>": 102343, "<|STREAMING_END|>": 102344, "<|STREAMING_TEXT_END|>": 102345, "<|AUDIO_UNUSE:0|>": 102346, "<|AUDIO_UNUSE:1|>": 102347, "<|AUDIO_UNUSE:2|>": 102348, "<|AUDIO_UNUSE:3|>": 102349, "<|AUDIO_UNUSE:4|>": 102350, "<|AUDIO_UNUSE:5|>": 102351, "<|AUDIO_UNUSE:6|>": 102352, "<|AUDIO_UNUSE:7|>": 102353, "<|AUDIO_UNUSE:8|>": 102354, "<|AUDIO_UNUSE:9|>": 102355, "<|AUDIO_UNUSE:10|>": 102356, "<|AUDIO_UNUSE:11|>": 102357, "<|AUDIO_UNUSE:12|>": 102358, "<|AUDIO_UNUSE:13|>": 102359, "<|AUDIO_UNUSE:14|>": 102360, "<|AUDIO_UNUSE:15|>": 102361, "<|AUDIO_UNUSE:16|>": 102362, "<|AUDIO_UNUSE:17|>": 102363, "<|AUDIO_UNUSE:18|>": 102364, "<|AUDIO_UNUSE:19|>": 102365, "<|AUDIO_UNUSE:20|>": 102366, "<|AUDIO_UNUSE:21|>": 102367, "<|AUDIO_UNUSE:22|>": 102368, "<|AUDIO_UNUSE:23|>": 102369, "<|AUDIO_UNUSE:24|>": 102370, "<|AUDIO_UNUSE:25|>": 102371, "<|AUDIO_UNUSE:26|>": 102372, "<|AUDIO_UNUSE:27|>": 102373, "<|AUDIO_UNUSE:28|>": 102374, "<|AUDIO_UNUSE:29|>": 102375, "<|AUDIO_UNUSE:30|>": 102376, "<|AUDIO_UNUSE:31|>": 102377, "<|AUDIO_UNUSE:32|>": 102378, "<|AUDIO_UNUSE:33|>": 102379, "<|AUDIO_UNUSE:34|>": 102380, "<|AUDIO_UNUSE:35|>": 102381, "<|AUDIO_UNUSE:36|>": 102382, "<|AUDIO_UNUSE:37|>": 102383, "<|AUDIO_UNUSE:38|>": 102384, "<|AUDIO_UNUSE:39|>": 102385, "<|AUDIO_UNUSE:40|>": 102386, "<|AUDIO_UNUSE:41|>": 102387, "<|AUDIO_UNUSE:42|>": 102388, "<|AUDIO_UNUSE:43|>": 102389, "<|AUDIO_UNUSE:44|>": 102390, "<|AUDIO_UNUSE:45|>": 102391, "<|AUDIO_UNUSE:46|>": 102392, "<|AUDIO_UNUSE:47|>": 102393, "<|AUDIO_UNUSE:48|>": 102394, "<|AUDIO_UNUSE:49|>": 102395, "<|AUDIO_UNUSE:50|>": 102396, "<|AUDIO_UNUSE:51|>": 102397, "<|AUDIO_UNUSE:52|>": 102398, "<|AUDIO_UNUSE:53|>": 102399, "<|AUDIO_UNUSE:54|>": 102400, "<|AUDIO_UNUSE:55|>": 102401, "<|AUDIO_UNUSE:56|>": 102402, "<|AUDIO_UNUSE:57|>": 102403, "<|AUDIO_UNUSE:58|>": 102404, "<|AUDIO_UNUSE:59|>": 102405, "<|AUDIO_UNUSE:60|>": 102406, "<|AUDIO_UNUSE:61|>": 102407, "<|AUDIO_UNUSE:62|>": 102408, "<|AUDIO_UNUSE:63|>": 102409, "<|AUDIO_UNUSE:64|>": 102410, "<|AUDIO_UNUSE:65|>": 102411, "<|AUDIO_UNUSE:66|>": 102412, "<|AUDIO_UNUSE:67|>": 102413, "<|AUDIO_UNUSE:68|>": 102414, "<|AUDIO_UNUSE:69|>": 102415, "<|AUDIO_UNUSE:70|>": 102416, "<|AUDIO_UNUSE:71|>": 102417, "<|AUDIO_UNUSE:72|>": 102418, "<|AUDIO_UNUSE:73|>": 102419, "<|AUDIO_UNUSE:74|>": 102420, "<|AUDIO_UNUSE:75|>": 102421, "<|AUDIO_UNUSE:76|>": 102422, "<|AUDIO_UNUSE:77|>": 102423, "<|AUDIO_UNUSE:78|>": 102424, "<|AUDIO_UNUSE:79|>": 102425, "<|AUDIO_UNUSE:80|>": 102426, "<|AUDIO_UNUSE:81|>": 102427, "<|AUDIO_UNUSE:82|>": 102428, "<|AUDIO_UNUSE:83|>": 102429, "<|AUDIO_UNUSE:84|>": 102430, "<|AUDIO_UNUSE:85|>": 102431, "<|AUDIO_UNUSE:86|>": 102432, "<|AUDIO_UNUSE:87|>": 102433, "<|AUDIO_UNUSE:88|>": 102434, "<|AUDIO_UNUSE:89|>": 102435, "<|AUDIO_UNUSE:90|>": 102436, "<|AUDIO_UNUSE:91|>": 102437, "<|AUDIO_UNUSE:92|>": 102438, "<|AUDIO_UNUSE:93|>": 102439, "<|AUDIO_UNUSE:94|>": 102440, "<|AUDIO_UNUSE:95|>": 102441, "<|AUDIO_UNUSE:96|>": 102442, "<|AUDIO_UNUSE:97|>": 102443, "<|AUDIO_UNUSE:98|>": 102444, "<|AUDIO_UNUSE:99|>": 102445, "<|AUDIO_UNUSE:100|>": 102446, "<|AUDIO_UNUSE:101|>": 102447, "<|AUDIO_UNUSE:102|>": 102448, "<|AUDIO_UNUSE:103|>": 102449, "<|AUDIO_UNUSE:104|>": 102450, "<|AUDIO_UNUSE:105|>": 102451, "<|AUDIO_UNUSE:106|>": 102452, "<|AUDIO_UNUSE:107|>": 102453, "<|AUDIO_UNUSE:108|>": 102454, "<|AUDIO_UNUSE:109|>": 102455, "<|AUDIO_UNUSE:110|>": 102456, "<|AUDIO_UNUSE:111|>": 102457, "<|AUDIO_UNUSE:112|>": 102458, "<|AUDIO_UNUSE:113|>": 102459, "<|AUDIO_UNUSE:114|>": 102460, "<|AUDIO_UNUSE:115|>": 102461, "<|AUDIO_UNUSE:116|>": 102462, "<|AUDIO_UNUSE:117|>": 102463, "<|AUDIO_UNUSE:118|>": 102464, "<|AUDIO_UNUSE:119|>": 102465, "<|AUDIO_UNUSE:120|>": 102466, "<|AUDIO_UNUSE:121|>": 102467, "<|AUDIO_UNUSE:122|>": 102468, "<|AUDIO_UNUSE:123|>": 102469, "<|AUDIO_UNUSE:124|>": 102470, "<|AUDIO_UNUSE:125|>": 102471, "<|AUDIO_UNUSE:126|>": 102472, "<|AUDIO_UNUSE:127|>": 102473, "<|AUDIO_UNUSE:128|>": 102474, "<|AUDIO_UNUSE:129|>": 102475, "<|AUDIO_UNUSE:130|>": 102476, "<|AUDIO_UNUSE:131|>": 102477, "<|AUDIO_UNUSE:132|>": 102478, "<|AUDIO_UNUSE:133|>": 102479, "<|AUDIO_UNUSE:134|>": 102480, "<|AUDIO_UNUSE:135|>": 102481, "<|AUDIO_UNUSE:136|>": 102482, "<|AUDIO_UNUSE:137|>": 102483, "<|AUDIO_UNUSE:138|>": 102484, "<|AUDIO_UNUSE:139|>": 102485, "<|AUDIO_UNUSE:140|>": 102486, "<|AUDIO_UNUSE:141|>": 102487, "<|AUDIO_UNUSE:142|>": 102488, "<|AUDIO_UNUSE:143|>": 102489, "<|AUDIO_UNUSE:144|>": 102490, "<|AUDIO_UNUSE:145|>": 102491, "<|AUDIO_UNUSE:146|>": 102492, "<|AUDIO_UNUSE:147|>": 102493, "<|AUDIO_UNUSE:148|>": 102494, "<|AUDIO_UNUSE:149|>": 102495, "<|AUDIO_UNUSE:150|>": 102496, "<|AUDIO_UNUSE:151|>": 102497, "<|AUDIO_UNUSE:152|>": 102498, "<|AUDIO_UNUSE:153|>": 102499, "<|AUDIO_UNUSE:154|>": 102500, "<|AUDIO_UNUSE:155|>": 102501, "<|AUDIO_UNUSE:156|>": 102502, "<|AUDIO_UNUSE:157|>": 102503, "<|AUDIO_UNUSE:158|>": 102504, "<|AUDIO_UNUSE:159|>": 102505, "<|AUDIO_UNUSE:160|>": 102506, "<|AUDIO_UNUSE:161|>": 102507, "<|AUDIO_UNUSE:162|>": 102508, "<|AUDIO_UNUSE:163|>": 102509, "<|AUDIO_UNUSE:164|>": 102510, "<|AUDIO_UNUSE:165|>": 102511, "<|AUDIO_UNUSE:166|>": 102512, "<|AUDIO_UNUSE:167|>": 102513, "<|AUDIO_UNUSE:168|>": 102514, "<|AUDIO_UNUSE:169|>": 102515, "<|AUDIO_UNUSE:170|>": 102516, "<|AUDIO_UNUSE:171|>": 102517, "<|AUDIO_UNUSE:172|>": 102518, "<|AUDIO_UNUSE:173|>": 102519, "<|AUDIO_UNUSE:174|>": 102520, "<|AUDIO_UNUSE:175|>": 102521, "<|AUDIO_UNUSE:176|>": 102522, "<|AUDIO_UNUSE:177|>": 102523, "<|AUDIO_UNUSE:178|>": 102524, "<|AUDIO_UNUSE:179|>": 102525, "<|AUDIO_UNUSE:180|>": 102526, "<|AUDIO_UNUSE:181|>": 102527, "<|AUDIO_UNUSE:182|>": 102528, "<|AUDIO_UNUSE:183|>": 102529, "<|AUDIO_UNUSE:184|>": 102530, "<|AUDIO_UNUSE:185|>": 102531, "<|AUDIO_UNUSE:186|>": 102532, "<|AUDIO_UNUSE:187|>": 102533, "<|AUDIO_UNUSE:188|>": 102534, "<|AUDIO_UNUSE:189|>": 102535, "<|AUDIO_UNUSE:190|>": 102536, "<|AUDIO_UNUSE:191|>": 102537, "<|AUDIO_UNUSE:192|>": 102538, "<|AUDIO_UNUSE:193|>": 102539, "<|AUDIO_UNUSE:194|>": 102540, "<|AUDIO_UNUSE:195|>": 102541, "<|AUDIO_UNUSE:196|>": 102542, "<|AUDIO_UNUSE:197|>": 102543, "<|AUDIO_UNUSE:198|>": 102544, "<|AUDIO_UNUSE:199|>": 102545, "<|AUDIO_UNUSE:200|>": 102546, "<|AUDIO_UNUSE:201|>": 102547, "<|AUDIO_UNUSE:202|>": 102548, "<|AUDIO_UNUSE:203|>": 102549, "<|AUDIO_UNUSE:204|>": 102550, "<|AUDIO_UNUSE:205|>": 102551, "<|AUDIO_UNUSE:206|>": 102552, "<|AUDIO_UNUSE:207|>": 102553, "<|AUDIO_UNUSE:208|>": 102554, "<|AUDIO_UNUSE:209|>": 102555, "<|AUDIO_UNUSE:210|>": 102556, "<|AUDIO_UNUSE:211|>": 102557, "<|AUDIO_UNUSE:212|>": 102558, "<|AUDIO_UNUSE:213|>": 102559, "<|AUDIO_UNUSE:214|>": 102560, "<|AUDIO_UNUSE:215|>": 102561, "<|AUDIO_UNUSE:216|>": 102562, "<|AUDIO_UNUSE:217|>": 102563, "<|AUDIO_UNUSE:218|>": 102564, "<|AUDIO_UNUSE:219|>": 102565, "<|AUDIO_UNUSE:220|>": 102566, "<|AUDIO_UNUSE:221|>": 102567, "<|AUDIO_UNUSE:222|>": 102568, "<|AUDIO_UNUSE:223|>": 102569, "<|AUDIO_UNUSE:224|>": 102570, "<|AUDIO_UNUSE:225|>": 102571, "<|AUDIO_UNUSE:226|>": 102572, "<|AUDIO_UNUSE:227|>": 102573, "<|AUDIO_UNUSE:228|>": 102574, "<|AUDIO_UNUSE:229|>": 102575, "<|AUDIO_UNUSE:230|>": 102576, "<|AUDIO_UNUSE:231|>": 102577, "<|AUDIO_UNUSE:232|>": 102578, "<|AUDIO_UNUSE:233|>": 102579, "<|AUDIO_UNUSE:234|>": 102580, "<|AUDIO_UNUSE:235|>": 102581, "<|AUDIO_UNUSE:236|>": 102582, "<|AUDIO_UNUSE:237|>": 102583, "<|AUDIO_UNUSE:238|>": 102584, "<|AUDIO_UNUSE:239|>": 102585, "<|AUDIO_UNUSE:240|>": 102586, "<|AUDIO_UNUSE:241|>": 102587, "<|AUDIO_UNUSE:242|>": 102588, "<|AUDIO_UNUSE:243|>": 102589, "<|AUDIO_UNUSE:244|>": 102590, "<|AUDIO_UNUSE:245|>": 102591, "<|AUDIO_UNUSE:246|>": 102592, "<|AUDIO_UNUSE:247|>": 102593, "<|AUDIO_UNUSE:248|>": 102594, "<|AUDIO_UNUSE:249|>": 102595, "<|AUDIO_UNUSE:250|>": 102596, "<|AUDIO_UNUSE:251|>": 102597, "<|AUDIO_UNUSE:252|>": 102598, "<|AUDIO_UNUSE:253|>": 102599, "<|AUDIO_UNUSE:254|>": 102600, "<|AUDIO_UNUSE:255|>": 102601, "<|AUDIO_UNUSE:256|>": 102602, "<|AUDIO_UNUSE:257|>": 102603, "<|AUDIO_UNUSE:258|>": 102604, "<|AUDIO_UNUSE:259|>": 102605, "<|AUDIO_UNUSE:260|>": 102606, "<|AUDIO_UNUSE:261|>": 102607, "<|AUDIO_UNUSE:262|>": 102608, "<|AUDIO_UNUSE:263|>": 102609, "<|AUDIO_UNUSE:264|>": 102610, "<|AUDIO_UNUSE:265|>": 102611, "<|AUDIO_UNUSE:266|>": 102612, "<|AUDIO_UNUSE:267|>": 102613, "<|AUDIO_UNUSE:268|>": 102614, "<|AUDIO_UNUSE:269|>": 102615, "<|AUDIO_UNUSE:270|>": 102616, "<|AUDIO_UNUSE:271|>": 102617, "<|AUDIO_UNUSE:272|>": 102618, "<|AUDIO_UNUSE:273|>": 102619, "<|AUDIO_UNUSE:274|>": 102620, "<|AUDIO_UNUSE:275|>": 102621, "<|AUDIO_UNUSE:276|>": 102622, "<|AUDIO_UNUSE:277|>": 102623, "<|AUDIO_UNUSE:278|>": 102624, "<|AUDIO_UNUSE:279|>": 102625, "<|AUDIO_UNUSE:280|>": 102626, "<|AUDIO_UNUSE:281|>": 102627, "<|AUDIO_UNUSE:282|>": 102628, "<|AUDIO_UNUSE:283|>": 102629, "<|AUDIO_UNUSE:284|>": 102630, "<|AUDIO_UNUSE:285|>": 102631, "<|AUDIO_UNUSE:286|>": 102632, "<|AUDIO_UNUSE:287|>": 102633, "<|AUDIO_UNUSE:288|>": 102634, "<|AUDIO_UNUSE:289|>": 102635, "<|AUDIO_UNUSE:290|>": 102636, "<|AUDIO_UNUSE:291|>": 102637, "<|AUDIO_UNUSE:292|>": 102638, "<|AUDIO_UNUSE:293|>": 102639, "<|AUDIO_UNUSE:294|>": 102640, "<|AUDIO_UNUSE:295|>": 102641, "<|AUDIO_UNUSE:296|>": 102642, "<|AUDIO_UNUSE:297|>": 102643, "<|AUDIO_UNUSE:298|>": 102644, "<|AUDIO_UNUSE:299|>": 102645, "<|AUDIO_UNUSE:300|>": 102646, "<|AUDIO_UNUSE:301|>": 102647, "<|AUDIO_UNUSE:302|>": 102648, "<|AUDIO_UNUSE:303|>": 102649, "<|AUDIO_UNUSE:304|>": 102650, "<|AUDIO_UNUSE:305|>": 102651, "<|AUDIO_UNUSE:306|>": 102652, "<|AUDIO_UNUSE:307|>": 102653, "<|AUDIO_UNUSE:308|>": 102654, "<|AUDIO_UNUSE:309|>": 102655, "<|AUDIO_UNUSE:310|>": 102656, "<|AUDIO_UNUSE:311|>": 102657, "<|AUDIO_UNUSE:312|>": 102658, "<|AUDIO_UNUSE:313|>": 102659, "<|AUDIO_UNUSE:314|>": 102660, "<|AUDIO_UNUSE:315|>": 102661, "<|AUDIO_UNUSE:316|>": 102662, "<|AUDIO_UNUSE:317|>": 102663, "<|AUDIO_UNUSE:318|>": 102664, "<|AUDIO_UNUSE:319|>": 102665, "<|AUDIO_UNUSE:320|>": 102666, "<|AUDIO_UNUSE:321|>": 102667, "<|AUDIO_UNUSE:322|>": 102668, "<|AUDIO_UNUSE:323|>": 102669, "<|AUDIO_UNUSE:324|>": 102670, "<|AUDIO_UNUSE:325|>": 102671, "<|AUDIO_UNUSE:326|>": 102672, "<|AUDIO_UNUSE:327|>": 102673, "<|AUDIO_UNUSE:328|>": 102674, "<|AUDIO_UNUSE:329|>": 102675, "<|AUDIO_UNUSE:330|>": 102676, "<|AUDIO_UNUSE:331|>": 102677, "<|AUDIO_UNUSE:332|>": 102678, "<|AUDIO_UNUSE:333|>": 102679, "<|AUDIO_UNUSE:334|>": 102680, "<|AUDIO_UNUSE:335|>": 102681, "<|AUDIO_UNUSE:336|>": 102682, "<|AUDIO_UNUSE:337|>": 102683, "<|AUDIO_UNUSE:338|>": 102684, "<|AUDIO_UNUSE:339|>": 102685, "<|AUDIO_UNUSE:340|>": 102686, "<|AUDIO_UNUSE:341|>": 102687, "<|AUDIO_UNUSE:342|>": 102688, "<|AUDIO_UNUSE:343|>": 102689, "<|AUDIO_UNUSE:344|>": 102690, "<|AUDIO_UNUSE:345|>": 102691, "<|AUDIO_UNUSE:346|>": 102692, "<|AUDIO_UNUSE:347|>": 102693, "<|AUDIO_UNUSE:348|>": 102694, "<|AUDIO_UNUSE:349|>": 102695, "<|AUDIO_UNUSE:350|>": 102696, "<|AUDIO_UNUSE:351|>": 102697, "<|AUDIO_UNUSE:352|>": 102698, "<|AUDIO_UNUSE:353|>": 102699, "<|AUDIO_UNUSE:354|>": 102700, "<|AUDIO_UNUSE:355|>": 102701, "<|AUDIO_UNUSE:356|>": 102702, "<|AUDIO_UNUSE:357|>": 102703, "<|AUDIO_UNUSE:358|>": 102704, "<|AUDIO_UNUSE:359|>": 102705, "<|AUDIO_UNUSE:360|>": 102706, "<|AUDIO_UNUSE:361|>": 102707, "<|AUDIO_UNUSE:362|>": 102708, "<|AUDIO_UNUSE:363|>": 102709, "<|AUDIO_UNUSE:364|>": 102710, "<|AUDIO_UNUSE:365|>": 102711, "<|AUDIO_UNUSE:366|>": 102712, "<|AUDIO_UNUSE:367|>": 102713, "<|AUDIO_UNUSE:368|>": 102714, "<|AUDIO_UNUSE:369|>": 102715, "<|AUDIO_UNUSE:370|>": 102716, "<|AUDIO_UNUSE:371|>": 102717, "<|AUDIO_UNUSE:372|>": 102718, "<|AUDIO_UNUSE:373|>": 102719, "<|AUDIO_UNUSE:374|>": 102720, "<|AUDIO_UNUSE:375|>": 102721, "<|AUDIO_UNUSE:376|>": 102722, "<|AUDIO_UNUSE:377|>": 102723, "<|AUDIO_UNUSE:378|>": 102724, "<|AUDIO_UNUSE:379|>": 102725, "<|AUDIO_UNUSE:380|>": 102726, "<|AUDIO_UNUSE:381|>": 102727, "<|AUDIO_UNUSE:382|>": 102728, "<|AUDIO_UNUSE:383|>": 102729, "<|AUDIO_UNUSE:384|>": 102730, "<|AUDIO_UNUSE:385|>": 102731, "<|AUDIO_UNUSE:386|>": 102732, "<|AUDIO_UNUSE:387|>": 102733, "<|AUDIO_UNUSE:388|>": 102734, "<|AUDIO_UNUSE:389|>": 102735, "<|AUDIO_UNUSE:390|>": 102736, "<|AUDIO_UNUSE:391|>": 102737, "<|AUDIO_UNUSE:392|>": 102738, "<|AUDIO_UNUSE:393|>": 102739, "<|AUDIO_UNUSE:394|>": 102740, "<|AUDIO_UNUSE:395|>": 102741, "<|AUDIO_UNUSE:396|>": 102742, "<|AUDIO_UNUSE:397|>": 102743, "<|AUDIO_UNUSE:398|>": 102744, "<|AUDIO_UNUSE:399|>": 102745, "<|AUDIO_UNUSE:400|>": 102746, "<|AUDIO_UNUSE:401|>": 102747, "<|AUDIO_UNUSE:402|>": 102748, "<|AUDIO_UNUSE:403|>": 102749, "<|AUDIO_UNUSE:404|>": 102750, "<|AUDIO_UNUSE:405|>": 102751, "<|AUDIO_UNUSE:406|>": 102752, "<|AUDIO_UNUSE:407|>": 102753, "<|AUDIO_UNUSE:408|>": 102754, "<|AUDIO_UNUSE:409|>": 102755, "<|AUDIO_UNUSE:410|>": 102756, "<|AUDIO_UNUSE:411|>": 102757, "<|AUDIO_UNUSE:412|>": 102758, "<|AUDIO_UNUSE:413|>": 102759, "<|AUDIO_UNUSE:414|>": 102760, "<|AUDIO_UNUSE:415|>": 102761, "<|AUDIO_UNUSE:416|>": 102762, "<|AUDIO_UNUSE:417|>": 102763, "<|AUDIO_UNUSE:418|>": 102764, "<|AUDIO_UNUSE:419|>": 102765, "<|AUDIO_UNUSE:420|>": 102766, "<|AUDIO_UNUSE:421|>": 102767, "<|AUDIO_UNUSE:422|>": 102768, "<|AUDIO_UNUSE:423|>": 102769, "<|AUDIO_UNUSE:424|>": 102770, "<|AUDIO_UNUSE:425|>": 102771, "<|AUDIO_UNUSE:426|>": 102772, "<|AUDIO_UNUSE:427|>": 102773, "<|AUDIO_UNUSE:428|>": 102774, "<|AUDIO_UNUSE:429|>": 102775, "<|AUDIO_UNUSE:430|>": 102776, "<|AUDIO_UNUSE:431|>": 102777, "<|AUDIO_UNUSE:432|>": 102778, "<|AUDIO_UNUSE:433|>": 102779, "<|AUDIO_UNUSE:434|>": 102780, "<|AUDIO_UNUSE:435|>": 102781, "<|AUDIO_UNUSE:436|>": 102782, "<|AUDIO_UNUSE:437|>": 102783, "<|AUDIO_UNUSE:438|>": 102784, "<|AUDIO_UNUSE:439|>": 102785, "<|AUDIO_UNUSE:440|>": 102786, "<|AUDIO_UNUSE:441|>": 102787, "<|AUDIO_UNUSE:442|>": 102788, "<|AUDIO_UNUSE:443|>": 102789, "<|AUDIO_UNUSE:444|>": 102790, "<|AUDIO_UNUSE:445|>": 102791, "<|AUDIO_UNUSE:446|>": 102792, "<|AUDIO_UNUSE:447|>": 102793, "<|AUDIO_UNUSE:448|>": 102794, "<|AUDIO_UNUSE:449|>": 102795, "<|AUDIO_UNUSE:450|>": 102796, "<|AUDIO_UNUSE:451|>": 102797, "<|AUDIO_UNUSE:452|>": 102798, "<|AUDIO_UNUSE:453|>": 102799, "<|AUDIO_UNUSE:454|>": 102800, "<|AUDIO_UNUSE:455|>": 102801, "<|AUDIO_UNUSE:456|>": 102802, "<|AUDIO_UNUSE:457|>": 102803, "<|AUDIO_UNUSE:458|>": 102804, "<|AUDIO_UNUSE:459|>": 102805, "<|AUDIO_UNUSE:460|>": 102806, "<|AUDIO_UNUSE:461|>": 102807, "<|AUDIO_UNUSE:462|>": 102808, "<|AUDIO_UNUSE:463|>": 102809, "<|AUDIO_UNUSE:464|>": 102810, "<|AUDIO_UNUSE:465|>": 102811, "<|AUDIO_UNUSE:466|>": 102812, "<|AUDIO_UNUSE:467|>": 102813, "<|AUDIO_UNUSE:468|>": 102814, "<|AUDIO_UNUSE:469|>": 102815, "<|AUDIO_UNUSE:470|>": 102816, "<|AUDIO_UNUSE:471|>": 102817, "<|AUDIO_UNUSE:472|>": 102818, "<|AUDIO_UNUSE:473|>": 102819, "<|AUDIO_UNUSE:474|>": 102820, "<|AUDIO_UNUSE:475|>": 102821, "<|AUDIO_UNUSE:476|>": 102822, "<|AUDIO_UNUSE:477|>": 102823, "<|AUDIO_UNUSE:478|>": 102824, "<|AUDIO_UNUSE:479|>": 102825, "<|AUDIO_UNUSE:480|>": 102826, "<|AUDIO_UNUSE:481|>": 102827, "<|AUDIO_UNUSE:482|>": 102828, "<|AUDIO_UNUSE:483|>": 102829, "<|AUDIO_UNUSE:484|>": 102830, "<|AUDIO_UNUSE:485|>": 102831, "<|AUDIO_UNUSE:486|>": 102832, "<|AUDIO_UNUSE:487|>": 102833, "<|AUDIO_UNUSE:488|>": 102834, "<|AUDIO_UNUSE:489|>": 102835, "<|AUDIO_UNUSE:490|>": 102836, "<|AUDIO_UNUSE:491|>": 102837, "<|AUDIO_UNUSE:492|>": 102838, "<|AUDIO_UNUSE:493|>": 102839, "<|AUDIO_UNUSE:494|>": 102840, "<|AUDIO_UNUSE:495|>": 102841, "<|AUDIO_UNUSE:496|>": 102842, "<|AUDIO_UNUSE:497|>": 102843, "<|AUDIO_UNUSE:498|>": 102844, "<|AUDIO_UNUSE:499|>": 102845, "<|AUDIO_UNUSE:500|>": 102846, "<|AUDIO_UNUSE:501|>": 102847, "<|AUDIO_UNUSE:502|>": 102848, "<|AUDIO_UNUSE:503|>": 102849, "<|AUDIO_UNUSE:504|>": 102850, "<|AUDIO_UNUSE:505|>": 102851, "<|AUDIO_UNUSE:506|>": 102852, "<|AUDIO_UNUSE:507|>": 102853, "<|AUDIO_UNUSE:508|>": 102854, "<|AUDIO_UNUSE:509|>": 102855, "<|AUDIO_UNUSE:510|>": 102856, "<|AUDIO_UNUSE:511|>": 102857, "<|AUDIO_UNUSE:512|>": 102858, "<|AUDIO_UNUSE:513|>": 102859, "<|AUDIO_UNUSE:514|>": 102860, "<|AUDIO_UNUSE:515|>": 102861, "<|AUDIO_UNUSE:516|>": 102862, "<|AUDIO_UNUSE:517|>": 102863, "<|AUDIO_UNUSE:518|>": 102864, "<|AUDIO_UNUSE:519|>": 102865, "<|AUDIO_UNUSE:520|>": 102866, "<|AUDIO_UNUSE:521|>": 102867, "<|AUDIO_UNUSE:522|>": 102868, "<|AUDIO_UNUSE:523|>": 102869, "<|AUDIO_UNUSE:524|>": 102870, "<|AUDIO_UNUSE:525|>": 102871, "<|AUDIO_UNUSE:526|>": 102872, "<|AUDIO_UNUSE:527|>": 102873, "<|AUDIO_UNUSE:528|>": 102874, "<|AUDIO_UNUSE:529|>": 102875, "<|AUDIO_UNUSE:530|>": 102876, "<|AUDIO_UNUSE:531|>": 102877, "<|AUDIO_UNUSE:532|>": 102878, "<|AUDIO_UNUSE:533|>": 102879, "<|AUDIO_UNUSE:534|>": 102880, "<|AUDIO_UNUSE:535|>": 102881, "<|AUDIO_UNUSE:536|>": 102882, "<|AUDIO_UNUSE:537|>": 102883, "<|AUDIO_UNUSE:538|>": 102884, "<|AUDIO_UNUSE:539|>": 102885, "<|AUDIO_UNUSE:540|>": 102886, "<|AUDIO_UNUSE:541|>": 102887, "<|AUDIO_UNUSE:542|>": 102888, "<|AUDIO_UNUSE:543|>": 102889, "<|AUDIO_UNUSE:544|>": 102890, "<|AUDIO_UNUSE:545|>": 102891, "<|AUDIO_UNUSE:546|>": 102892, "<|AUDIO_UNUSE:547|>": 102893, "<|AUDIO_UNUSE:548|>": 102894, "<|AUDIO_UNUSE:549|>": 102895, "<|AUDIO_UNUSE:550|>": 102896, "<|AUDIO_UNUSE:551|>": 102897, "<|AUDIO_UNUSE:552|>": 102898, "<|AUDIO_UNUSE:553|>": 102899, "<|AUDIO_UNUSE:554|>": 102900, "<|AUDIO_UNUSE:555|>": 102901, "<|AUDIO_UNUSE:556|>": 102902, "<|AUDIO_UNUSE:557|>": 102903, "<|AUDIO_UNUSE:558|>": 102904, "<|AUDIO_UNUSE:559|>": 102905, "<|AUDIO_UNUSE:560|>": 102906, "<|AUDIO_UNUSE:561|>": 102907, "<|AUDIO_UNUSE:562|>": 102908, "<|AUDIO_UNUSE:563|>": 102909, "<|AUDIO_UNUSE:564|>": 102910, "<|AUDIO_UNUSE:565|>": 102911, "<|AUDIO_UNUSE:566|>": 102912, "<|AUDIO_UNUSE:567|>": 102913, "<|AUDIO_UNUSE:568|>": 102914, "<|AUDIO_UNUSE:569|>": 102915, "<|AUDIO_UNUSE:570|>": 102916, "<|AUDIO_UNUSE:571|>": 102917, "<|AUDIO_UNUSE:572|>": 102918, "<|AUDIO_UNUSE:573|>": 102919, "<|AUDIO_UNUSE:574|>": 102920, "<|AUDIO_UNUSE:575|>": 102921, "<|AUDIO_UNUSE:576|>": 102922, "<|AUDIO_UNUSE:577|>": 102923, "<|AUDIO_UNUSE:578|>": 102924, "<|AUDIO_UNUSE:579|>": 102925, "<|AUDIO_UNUSE:580|>": 102926, "<|AUDIO_UNUSE:581|>": 102927, "<|AUDIO_UNUSE:582|>": 102928, "<|AUDIO_UNUSE:583|>": 102929, "<|AUDIO_UNUSE:584|>": 102930, "<|AUDIO_UNUSE:585|>": 102931, "<|AUDIO_UNUSE:586|>": 102932, "<|AUDIO_UNUSE:587|>": 102933, "<|AUDIO_UNUSE:588|>": 102934, "<|AUDIO_UNUSE:589|>": 102935, "<|AUDIO_UNUSE:590|>": 102936, "<|AUDIO_UNUSE:591|>": 102937, "<|AUDIO_UNUSE:592|>": 102938, "<|AUDIO_UNUSE:593|>": 102939, "<|AUDIO_UNUSE:594|>": 102940, "<|AUDIO_UNUSE:595|>": 102941, "<|AUDIO_UNUSE:596|>": 102942, "<|AUDIO_UNUSE:597|>": 102943, "<|AUDIO_UNUSE:598|>": 102944, "<|AUDIO_UNUSE:599|>": 102945, "<|AUDIO_UNUSE:600|>": 102946, "<|AUDIO_UNUSE:601|>": 102947, "<|AUDIO_UNUSE:602|>": 102948, "<|AUDIO_UNUSE:603|>": 102949, "<|AUDIO_UNUSE:604|>": 102950, "<|AUDIO_UNUSE:605|>": 102951, "<|AUDIO_UNUSE:606|>": 102952, "<|AUDIO_UNUSE:607|>": 102953, "<|AUDIO_UNUSE:608|>": 102954, "<|AUDIO_UNUSE:609|>": 102955, "<|AUDIO_UNUSE:610|>": 102956, "<|AUDIO_UNUSE:611|>": 102957, "<|AUDIO_UNUSE:612|>": 102958, "<|AUDIO_UNUSE:613|>": 102959, "<|AUDIO_UNUSE:614|>": 102960, "<|AUDIO_UNUSE:615|>": 102961, "<|AUDIO_UNUSE:616|>": 102962, "<|AUDIO_UNUSE:617|>": 102963, "<|AUDIO_UNUSE:618|>": 102964, "<|AUDIO_UNUSE:619|>": 102965, "<|AUDIO_UNUSE:620|>": 102966, "<|AUDIO_UNUSE:621|>": 102967, "<|AUDIO_UNUSE:622|>": 102968, "<|AUDIO_UNUSE:623|>": 102969, "<|AUDIO_UNUSE:624|>": 102970, "<|AUDIO_UNUSE:625|>": 102971, "<|AUDIO_UNUSE:626|>": 102972, "<|AUDIO_UNUSE:627|>": 102973, "<|AUDIO_UNUSE:628|>": 102974, "<|AUDIO_UNUSE:629|>": 102975, "<|AUDIO_UNUSE:630|>": 102976, "<|AUDIO_UNUSE:631|>": 102977, "<|AUDIO_UNUSE:632|>": 102978, "<|AUDIO_UNUSE:633|>": 102979, "<|AUDIO_UNUSE:634|>": 102980, "<|AUDIO_UNUSE:635|>": 102981, "<|AUDIO_UNUSE:636|>": 102982, "<|AUDIO_UNUSE:637|>": 102983, "<|AUDIO_UNUSE:638|>": 102984, "<|AUDIO_UNUSE:639|>": 102985, "<|AUDIO_UNUSE:640|>": 102986, "<|AUDIO_UNUSE:641|>": 102987, "<|AUDIO_UNUSE:642|>": 102988, "<|AUDIO_UNUSE:643|>": 102989, "<|AUDIO_UNUSE:644|>": 102990, "<|AUDIO_UNUSE:645|>": 102991, "<|AUDIO_UNUSE:646|>": 102992, "<|AUDIO_UNUSE:647|>": 102993, "<|AUDIO_UNUSE:648|>": 102994, "<|AUDIO_UNUSE:649|>": 102995, "<|AUDIO_UNUSE:650|>": 102996, "<|AUDIO_UNUSE:651|>": 102997, "<|AUDIO_UNUSE:652|>": 102998, "<|AUDIO_UNUSE:653|>": 102999, "<|AUDIO_UNUSE:654|>": 103000, "<|AUDIO_UNUSE:655|>": 103001, "<|AUDIO_UNUSE:656|>": 103002, "<|AUDIO_UNUSE:657|>": 103003, "<|AUDIO_UNUSE:658|>": 103004, "<|AUDIO_UNUSE:659|>": 103005, "<|AUDIO_UNUSE:660|>": 103006, "<|AUDIO_UNUSE:661|>": 103007, "<|AUDIO_UNUSE:662|>": 103008, "<|AUDIO_UNUSE:663|>": 103009, "<|AUDIO_UNUSE:664|>": 103010, "<|AUDIO_UNUSE:665|>": 103011, "<|AUDIO_UNUSE:666|>": 103012, "<|AUDIO_UNUSE:667|>": 103013, "<|AUDIO_UNUSE:668|>": 103014, "<|AUDIO_UNUSE:669|>": 103015, "<|AUDIO_UNUSE:670|>": 103016, "<|AUDIO_UNUSE:671|>": 103017, "<|AUDIO_UNUSE:672|>": 103018, "<|AUDIO_UNUSE:673|>": 103019, "<|AUDIO_UNUSE:674|>": 103020, "<|AUDIO_UNUSE:675|>": 103021, "<|AUDIO_UNUSE:676|>": 103022, "<|AUDIO_UNUSE:677|>": 103023, "<|AUDIO_UNUSE:678|>": 103024, "<|AUDIO_UNUSE:679|>": 103025, "<|AUDIO_UNUSE:680|>": 103026, "<|AUDIO_UNUSE:681|>": 103027, "<|AUDIO_UNUSE:682|>": 103028, "<|AUDIO_UNUSE:683|>": 103029, "<|AUDIO_UNUSE:684|>": 103030, "<|AUDIO_UNUSE:685|>": 103031, "<|AUDIO_UNUSE:686|>": 103032, "<|AUDIO_UNUSE:687|>": 103033, "<|AUDIO_UNUSE:688|>": 103034, "<|AUDIO_UNUSE:689|>": 103035, "<|AUDIO_UNUSE:690|>": 103036, "<|AUDIO_UNUSE:691|>": 103037, "<|AUDIO_UNUSE:692|>": 103038, "<|AUDIO_UNUSE:693|>": 103039, "<|AUDIO_UNUSE:694|>": 103040, "<|AUDIO_UNUSE:695|>": 103041, "<|AUDIO_UNUSE:696|>": 103042, "<|AUDIO_UNUSE:697|>": 103043, "<|AUDIO_UNUSE:698|>": 103044, "<|AUDIO_UNUSE:699|>": 103045, "<|AUDIO_UNUSE:700|>": 103046, "<|AUDIO_UNUSE:701|>": 103047, "<|AUDIO_UNUSE:702|>": 103048, "<|AUDIO_UNUSE:703|>": 103049, "<|AUDIO_UNUSE:704|>": 103050, "<|AUDIO_UNUSE:705|>": 103051, "<|AUDIO_UNUSE:706|>": 103052, "<|AUDIO_UNUSE:707|>": 103053, "<|AUDIO_UNUSE:708|>": 103054, "<|AUDIO_UNUSE:709|>": 103055, "<|AUDIO_UNUSE:710|>": 103056, "<|AUDIO_UNUSE:711|>": 103057, "<|AUDIO_UNUSE:712|>": 103058, "<|AUDIO_UNUSE:713|>": 103059, "<|AUDIO_UNUSE:714|>": 103060, "<|AUDIO_UNUSE:715|>": 103061, "<|AUDIO_UNUSE:716|>": 103062, "<|AUDIO_UNUSE:717|>": 103063, "<|AUDIO_UNUSE:718|>": 103064, "<|AUDIO_UNUSE:719|>": 103065, "<|AUDIO_UNUSE:720|>": 103066, "<|AUDIO_UNUSE:721|>": 103067, "<|AUDIO_UNUSE:722|>": 103068, "<|AUDIO_UNUSE:723|>": 103069, "<|AUDIO_UNUSE:724|>": 103070, "<|AUDIO_UNUSE:725|>": 103071, "<|AUDIO_UNUSE:726|>": 103072, "<|AUDIO_UNUSE:727|>": 103073, "<|AUDIO_UNUSE:728|>": 103074, "<|AUDIO_UNUSE:729|>": 103075, "<|AUDIO_UNUSE:730|>": 103076, "<|AUDIO_UNUSE:731|>": 103077, "<|AUDIO_UNUSE:732|>": 103078, "<|AUDIO_UNUSE:733|>": 103079, "<|AUDIO_UNUSE:734|>": 103080, "<|AUDIO_UNUSE:735|>": 103081, "<|AUDIO_UNUSE:736|>": 103082, "<|AUDIO_UNUSE:737|>": 103083, "<|AUDIO_UNUSE:738|>": 103084, "<|AUDIO_UNUSE:739|>": 103085, "<|AUDIO_UNUSE:740|>": 103086, "<|AUDIO_UNUSE:741|>": 103087, "<|AUDIO_UNUSE:742|>": 103088, "<|AUDIO_UNUSE:743|>": 103089, "<|AUDIO_UNUSE:744|>": 103090, "<|AUDIO_UNUSE:745|>": 103091, "<|AUDIO_UNUSE:746|>": 103092, "<|AUDIO_UNUSE:747|>": 103093, "<|AUDIO_UNUSE:748|>": 103094, "<|AUDIO_UNUSE:749|>": 103095, "<|AUDIO_UNUSE:750|>": 103096, "<|AUDIO_UNUSE:751|>": 103097, "<|AUDIO_UNUSE:752|>": 103098, "<|AUDIO_UNUSE:753|>": 103099, "<|AUDIO_UNUSE:754|>": 103100, "<|AUDIO_UNUSE:755|>": 103101, "<|AUDIO_UNUSE:756|>": 103102, "<|AUDIO_UNUSE:757|>": 103103, "<|AUDIO_UNUSE:758|>": 103104, "<|AUDIO_UNUSE:759|>": 103105, "<|AUDIO_UNUSE:760|>": 103106, "<|AUDIO_UNUSE:761|>": 103107, "<|AUDIO_UNUSE:762|>": 103108, "<|AUDIO_UNUSE:763|>": 103109, "<|AUDIO_UNUSE:764|>": 103110, "<|AUDIO_UNUSE:765|>": 103111, "<|AUDIO_UNUSE:766|>": 103112, "<|AUDIO_UNUSE:767|>": 103113, "<|AUDIO_UNUSE:768|>": 103114, "<|AUDIO_UNUSE:769|>": 103115, "<|AUDIO_UNUSE:770|>": 103116, "<|AUDIO_UNUSE:771|>": 103117, "<|AUDIO_UNUSE:772|>": 103118, "<|AUDIO_UNUSE:773|>": 103119, "<|AUDIO_UNUSE:774|>": 103120, "<|AUDIO_UNUSE:775|>": 103121, "<|AUDIO_UNUSE:776|>": 103122, "<|AUDIO_UNUSE:777|>": 103123, "<|AUDIO_UNUSE:778|>": 103124, "<|AUDIO_UNUSE:779|>": 103125, "<|AUDIO_UNUSE:780|>": 103126, "<|AUDIO_UNUSE:781|>": 103127, "<|AUDIO_UNUSE:782|>": 103128, "<|AUDIO_UNUSE:783|>": 103129, "<|AUDIO_UNUSE:784|>": 103130, "<|AUDIO_UNUSE:785|>": 103131, "<|AUDIO_UNUSE:786|>": 103132, "<|AUDIO_UNUSE:787|>": 103133, "<|AUDIO_UNUSE:788|>": 103134, "<|AUDIO_UNUSE:789|>": 103135, "<|AUDIO_UNUSE:790|>": 103136, "<|AUDIO_UNUSE:791|>": 103137, "<|AUDIO_UNUSE:792|>": 103138, "<|AUDIO_UNUSE:793|>": 103139, "<|AUDIO_UNUSE:794|>": 103140, "<|AUDIO_UNUSE:795|>": 103141, "<|AUDIO_UNUSE:796|>": 103142, "<|AUDIO_UNUSE:797|>": 103143, "<|AUDIO_UNUSE:798|>": 103144, "<|AUDIO_UNUSE:799|>": 103145, "<|AUDIO_UNUSE:800|>": 103146, "<|AUDIO_UNUSE:801|>": 103147, "<|AUDIO_UNUSE:802|>": 103148, "<|AUDIO_UNUSE:803|>": 103149, "<|AUDIO_UNUSE:804|>": 103150, "<|AUDIO_UNUSE:805|>": 103151, "<|AUDIO_UNUSE:806|>": 103152, "<|AUDIO_UNUSE:807|>": 103153, "<|AUDIO_UNUSE:808|>": 103154, "<|AUDIO_UNUSE:809|>": 103155, "<|AUDIO_UNUSE:810|>": 103156, "<|AUDIO_UNUSE:811|>": 103157, "<|AUDIO_UNUSE:812|>": 103158, "<|AUDIO_UNUSE:813|>": 103159, "<|AUDIO_UNUSE:814|>": 103160, "<|AUDIO_UNUSE:815|>": 103161, "<|AUDIO_UNUSE:816|>": 103162, "<|AUDIO_UNUSE:817|>": 103163, "<|AUDIO_UNUSE:818|>": 103164, "<|AUDIO_UNUSE:819|>": 103165, "<|AUDIO_UNUSE:820|>": 103166, "<|AUDIO_UNUSE:821|>": 103167, "<|AUDIO_UNUSE:822|>": 103168, "<|AUDIO_UNUSE:823|>": 103169, "<|AUDIO_UNUSE:824|>": 103170, "<|AUDIO_UNUSE:825|>": 103171, "<|AUDIO_UNUSE:826|>": 103172, "<|AUDIO_UNUSE:827|>": 103173, "<|AUDIO_UNUSE:828|>": 103174, "<|AUDIO_UNUSE:829|>": 103175, "<|AUDIO_UNUSE:830|>": 103176, "<|AUDIO_UNUSE:831|>": 103177, "<|AUDIO_UNUSE:832|>": 103178, "<|AUDIO_UNUSE:833|>": 103179, "<|AUDIO_UNUSE:834|>": 103180, "<|AUDIO_UNUSE:835|>": 103181, "<|AUDIO_UNUSE:836|>": 103182, "<|AUDIO_UNUSE:837|>": 103183, "<|AUDIO_UNUSE:838|>": 103184, "<|AUDIO_UNUSE:839|>": 103185, "<|AUDIO_UNUSE:840|>": 103186, "<|AUDIO_UNUSE:841|>": 103187, "<|AUDIO_UNUSE:842|>": 103188, "<|AUDIO_UNUSE:843|>": 103189, "<|AUDIO_UNUSE:844|>": 103190, "<|AUDIO_UNUSE:845|>": 103191, "<|AUDIO_UNUSE:846|>": 103192, "<|AUDIO_UNUSE:847|>": 103193, "<|AUDIO_UNUSE:848|>": 103194, "<|AUDIO_UNUSE:849|>": 103195, "<|AUDIO_UNUSE:850|>": 103196, "<|AUDIO_UNUSE:851|>": 103197, "<|AUDIO_UNUSE:852|>": 103198, "<|AUDIO_UNUSE:853|>": 103199, "<|AUDIO_UNUSE:854|>": 103200, "<|AUDIO_UNUSE:855|>": 103201, "<|AUDIO_UNUSE:856|>": 103202, "<|AUDIO_UNUSE:857|>": 103203, "<|AUDIO_UNUSE:858|>": 103204, "<|AUDIO_UNUSE:859|>": 103205, "<|AUDIO_UNUSE:860|>": 103206, "<|AUDIO_UNUSE:861|>": 103207, "<|AUDIO_UNUSE:862|>": 103208, "<|AUDIO_UNUSE:863|>": 103209, "<|AUDIO_UNUSE:864|>": 103210, "<|AUDIO_UNUSE:865|>": 103211, "<|AUDIO_UNUSE:866|>": 103212, "<|AUDIO_UNUSE:867|>": 103213, "<|AUDIO_UNUSE:868|>": 103214, "<|AUDIO_UNUSE:869|>": 103215, "<|AUDIO_UNUSE:870|>": 103216, "<|AUDIO_UNUSE:871|>": 103217, "<|AUDIO_UNUSE:872|>": 103218, "<|AUDIO_UNUSE:873|>": 103219, "<|AUDIO_UNUSE:874|>": 103220, "<|AUDIO_UNUSE:875|>": 103221, "<|AUDIO_UNUSE:876|>": 103222, "<|AUDIO_UNUSE:877|>": 103223, "<|AUDIO_UNUSE:878|>": 103224, "<|AUDIO_UNUSE:879|>": 103225, "<|AUDIO_UNUSE:880|>": 103226, "<|AUDIO_UNUSE:881|>": 103227, "<|AUDIO_UNUSE:882|>": 103228, "<|AUDIO_UNUSE:883|>": 103229, "<|AUDIO_UNUSE:884|>": 103230, "<|AUDIO_UNUSE:885|>": 103231, "<|AUDIO_UNUSE:886|>": 103232, "<|AUDIO_UNUSE:887|>": 103233, "<|AUDIO_UNUSE:888|>": 103234, "<|AUDIO_UNUSE:889|>": 103235, "<|AUDIO_UNUSE:890|>": 103236, "<|AUDIO_UNUSE:891|>": 103237, "<|AUDIO_UNUSE:892|>": 103238, "<|AUDIO_UNUSE:893|>": 103239, "<|AUDIO_UNUSE:894|>": 103240, "<|AUDIO_UNUSE:895|>": 103241, "<|AUDIO_UNUSE:896|>": 103242, "<|AUDIO_UNUSE:897|>": 103243, "<|AUDIO_UNUSE:898|>": 103244, "<|AUDIO_UNUSE:899|>": 103245, "<|AUDIO_UNUSE:900|>": 103246, "<|AUDIO_UNUSE:901|>": 103247, "<|AUDIO_UNUSE:902|>": 103248, "<|AUDIO_UNUSE:903|>": 103249, "<|AUDIO_UNUSE:904|>": 103250, "<|AUDIO_UNUSE:905|>": 103251, "<|AUDIO_UNUSE:906|>": 103252, "<|AUDIO_UNUSE:907|>": 103253, "<|AUDIO_UNUSE:908|>": 103254, "<|AUDIO_UNUSE:909|>": 103255, "<|AUDIO_UNUSE:910|>": 103256, "<|AUDIO_UNUSE:911|>": 103257, "<|AUDIO_UNUSE:912|>": 103258, "<|AUDIO_UNUSE:913|>": 103259, "<|AUDIO_UNUSE:914|>": 103260, "<|AUDIO_UNUSE:915|>": 103261, "<|AUDIO_UNUSE:916|>": 103262, "<|AUDIO_UNUSE:917|>": 103263, "<|AUDIO_UNUSE:918|>": 103264, "<|AUDIO_UNUSE:919|>": 103265, "<|AUDIO_UNUSE:920|>": 103266, "<|AUDIO_UNUSE:921|>": 103267, "<|AUDIO_UNUSE:922|>": 103268, "<|AUDIO_UNUSE:923|>": 103269, "<|AUDIO_UNUSE:924|>": 103270, "<|AUDIO_UNUSE:925|>": 103271, "<|AUDIO_UNUSE:926|>": 103272, "<|AUDIO_UNUSE:927|>": 103273, "<|AUDIO_UNUSE:928|>": 103274, "<|AUDIO_UNUSE:929|>": 103275, "<|AUDIO_UNUSE:930|>": 103276, "<|AUDIO_UNUSE:931|>": 103277, "<|AUDIO_UNUSE:932|>": 103278, "<|AUDIO_UNUSE:933|>": 103279, "<|AUDIO_UNUSE:934|>": 103280, "<|AUDIO_UNUSE:935|>": 103281, "<|AUDIO_UNUSE:936|>": 103282, "<|AUDIO_UNUSE:937|>": 103283, "<|AUDIO_UNUSE:938|>": 103284, "<|AUDIO_UNUSE:939|>": 103285, "<|AUDIO_UNUSE:940|>": 103286, "<|AUDIO_UNUSE:941|>": 103287, "<|AUDIO_UNUSE:942|>": 103288, "<|AUDIO_UNUSE:943|>": 103289, "<|AUDIO_UNUSE:944|>": 103290, "<|AUDIO_UNUSE:945|>": 103291, "<|AUDIO_UNUSE:946|>": 103292, "<|AUDIO_UNUSE:947|>": 103293, "<|AUDIO_UNUSE:948|>": 103294, "<|AUDIO_UNUSE:949|>": 103295, "<|AUDIO_UNUSE:950|>": 103296, "<|AUDIO_UNUSE:951|>": 103297, "<|AUDIO_UNUSE:952|>": 103298, "<|AUDIO_UNUSE:953|>": 103299, "<|AUDIO_UNUSE:954|>": 103300, "<|AUDIO_UNUSE:955|>": 103301, "<|AUDIO_UNUSE:956|>": 103302, "<|AUDIO_UNUSE:957|>": 103303, "<|AUDIO_UNUSE:958|>": 103304, "<|AUDIO_UNUSE:959|>": 103305, "<|AUDIO_UNUSE:960|>": 103306, "<|AUDIO_UNUSE:961|>": 103307, "<|AUDIO_UNUSE:962|>": 103308, "<|AUDIO_UNUSE:963|>": 103309, "<|AUDIO_UNUSE:964|>": 103310, "<|AUDIO_UNUSE:965|>": 103311, "<|AUDIO_UNUSE:966|>": 103312, "<|AUDIO_UNUSE:967|>": 103313, "<|AUDIO_UNUSE:968|>": 103314, "<|AUDIO_UNUSE:969|>": 103315, "<|AUDIO_UNUSE:970|>": 103316, "<|AUDIO_UNUSE:971|>": 103317, "<|AUDIO_UNUSE:972|>": 103318, "<|AUDIO_UNUSE:973|>": 103319, "<|AUDIO_UNUSE:974|>": 103320, "<|AUDIO_UNUSE:975|>": 103321, "<|AUDIO_UNUSE:976|>": 103322, "<|AUDIO_UNUSE:977|>": 103323, "<|AUDIO_UNUSE:978|>": 103324, "<|AUDIO_UNUSE:979|>": 103325, "<|AUDIO_UNUSE:980|>": 103326, "<|AUDIO_UNUSE:981|>": 103327, "<|AUDIO_UNUSE:982|>": 103328, "<|AUDIO_UNUSE:983|>": 103329, "<|AUDIO_UNUSE:984|>": 103330, "<|AUDIO_UNUSE:985|>": 103331, "<|AUDIO_UNUSE:986|>": 103332, "<|AUDIO_UNUSE:987|>": 103333, "<|AUDIO_UNUSE:988|>": 103334, "<|AUDIO_UNUSE:989|>": 103335, "<|AUDIO_UNUSE:990|>": 103336, "<|AUDIO_UNUSE:991|>": 103337, "<|AUDIO_UNUSE:992|>": 103338, "<|AUDIO_UNUSE:993|>": 103339, "<|AUDIO_UNUSE:994|>": 103340, "<|AUDIO_UNUSE:995|>": 103341, "<|AUDIO_UNUSE:996|>": 103342, "<|AUDIO_UNUSE:997|>": 103343, "<|AUDIO_UNUSE:998|>": 103344, "<|AUDIO_UNUSE:999|>": 103345, "<|AUDIO_UNUSE:1000|>": 103346, "<|AUDIO_UNUSE:1001|>": 103347, "<|AUDIO_UNUSE:1002|>": 103348, "<|AUDIO_UNUSE:1003|>": 103349, "<|AUDIO_UNUSE:1004|>": 103350, "<|AUDIO_UNUSE:1005|>": 103351, "<|AUDIO_UNUSE:1006|>": 103352, "<|AUDIO_UNUSE:1007|>": 103353, "<|AUDIO_UNUSE:1008|>": 103354, "<|AUDIO_UNUSE:1009|>": 103355, "<|AUDIO_UNUSE:1010|>": 103356, "<|AUDIO_UNUSE:1011|>": 103357, "<|AUDIO_UNUSE:1012|>": 103358, "<|AUDIO_UNUSE:1013|>": 103359, "<|AUDIO_UNUSE:1014|>": 103360, "<|AUDIO_UNUSE:1015|>": 103361, "<|AUDIO_UNUSE:1016|>": 103362, "<|AUDIO_UNUSE:1017|>": 103363, "<|AUDIO_UNUSE:1018|>": 103364, "<|AUDIO_UNUSE:1019|>": 103365, "<|AUDIO_UNUSE:1020|>": 103366} \ No newline at end of file diff --git a/chat_template.json b/chat_template.json new file mode 100644 index 0000000000000000000000000000000000000000..0d0b145c3679440bc68e39c3d99450f92b782b49 --- /dev/null +++ b/chat_template.json @@ -0,0 +1,3 @@ +{ + "chat_template": "\n{%- set image_count = namespace(value=0) -%}\n{%- set video_count = namespace(value=0) -%}\n{{- '<|begin_of_sentence|>' }}\n{%- for message in messages -%}\n {%- if message.role in ['system', 'user'] -%}\n {%- if message.role == 'user' -%}\n {{- 'User: ' -}}\n {%- endif -%}\n {%- if message.content is string -%}\n {{- message.content -}}\n {%- else -%}\n {%- for content_item in message.content -%}\n {%- if content_item.type == 'text' -%}\n {{- content_item.text -}}\n {%- elif content_item.type == 'image_url' -%}\n {%- set image_count.value = image_count.value + 1 -%}\n Picture {{ image_count.value }}:<|IMAGE_START|><|image@placeholder|><|IMAGE_END|>\n {%- elif content_item.type == 'video_url' -%}\n {%- set video_count.value = video_count.value + 1 -%}\n Video {{ video_count.value }}:<|VIDEO_START|><|video@placeholder|><|VIDEO_END|>\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- if message.role == 'system' -%}\n {{- '\n' -}}\n {%- endif -%}\n {%- elif message.role == 'assistant' -%}\n {%- macro extract_text_content(content_field) -%}\n {%- if content_field is string -%}\n {{- content_field -}}\n {%- elif content_field is iterable and content_field is not string -%}\n {%- set ns = namespace(text_parts=[]) -%}\n {%- set text_parts = [] -%}\n {%- for item in content_field -%}\n {%- if item.type == 'text' -%}\n {%- set ns.text_parts = ns.text_parts + [item.text] -%}\n {%- endif -%}\n {%- endfor -%}\n {{- ns.text_parts | join('') -}}\n {%- else -%}\n {{- '' -}}\n {%- endif -%}\n {%- endmacro -%}\n {%- set reasoning_content = extract_text_content(message.reasoning_content) -%}\n {%- set content = extract_text_content(message.content) -%}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}\n {%- set content = content.split('')[-1].lstrip('\n') %}\n {%- endif %}\n {%- if reasoning_content %}\n {{- '\n' + 'Assistant: ' + '\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}\n {%- else %}\n {{- '\n' + 'Assistant: ' + content }}\n {%- endif %}\n {{- '<|end_of_sentence|>' }}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt is not defined or add_generation_prompt is true %}\n {{- '\nAssistant: ' -}}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '\n\n\n\n' }}\n {%- endif %}\n {%- if enable_thinking is not defined or enable_thinking is true %}\n {{- '' }}\n {%- endif %}\n{%- endif %}\n" +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..70e8af1c334aacc40e4c2c3bec6420576b5c433b --- /dev/null +++ b/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Ernie4_5_VLMoeForConditionalGeneration" + ], + "auto_map": { + "AutoConfig": "configuration_ernie_45t_vl.Ernie4_5_VLMoEConfig", + "AutoModel": "modeling_ernie_45t_vl.Ernie4_5_VLMoeForConditionalGeneration", + "AutoModelForCausalLM": "modeling_ernie_45t_vl.Ernie4_5_VLMoeForConditionalGeneration", + "AutoProcessor": "processing_ernie_45t_vl.Ernie_45T_VLProcessor", + "AutoImageProcessor": "image_processing_ernie_45t_vl.Ernie_45T_VLImageProcessor" + }, + "hidden_act": "silu", + "hidden_size": 8192, + "intermediate_size": 28672, + "im_patch_id": 100295, + "max_position_embeddings": 131072, + "num_attention_heads": 64, + "num_key_value_heads": 8, + "num_hidden_layers": 54, + "pixel_hidden_size": 1280, + "model_type": "ernie4_5_moe_vl", + "rms_norm_eps": 1e-05, + "vocab_size": 103424, + "rope_theta": 500000, + "use_rmsnorm": true, + "use_bias": false, + "moe_layer_start_index": 3, + "moe_capacity": [64,64,64], + "moe_gate": "topk", + "moe_k": 8, + "moe_layer_interval": 1, + "loss_subbatch_seqlen": 8192, + "rope_3d": true, + "freq_allocation": 20, + "spatial_conv_size": 2, + "temporal_conv_size": 2, + "moe_use_aux_free": true, + "moe_num_experts": [64, 64], + "moe_intermediate_size": [3584, 1536], + "torch_dtype": "bfloat16", + "tie_word_embeddings": false, + "vision_config": { + "attn_implementation": "eager", + "depth": 32, + "embed_dim": 1280, + "hidden_act": "quick_gelu", + "hidden_size": 1280, + "in_channels": 3, + "in_chans": 3, + "mlp_ratio": 4, + "num_heads": 16, + "patch_size": 14, + "spatial_merge_size": 2, + "spatial_patch_size": 14, + "attn_sep": true + } +} diff --git a/configuration_ernie_45t_vl.py b/configuration_ernie_45t_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..d7e85aa1b2431501844723487b5a36e67571334e --- /dev/null +++ b/configuration_ernie_45t_vl.py @@ -0,0 +1,648 @@ +# Copyright (c) 2025 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ernie model configuration""" +import copy + +from typing import List, Optional, Tuple, Union + +from transformers import PretrainedConfig + + +__all__ = [ + "ERNIE_PRETRAINED_INIT_CONFIGURATION", + "Ernie4_5_Config", + "Ernie4_5_MoEConfig", + "Ernie4_5_VLMoEConfig", +] + + +class DFNRopeVisionTransformerConfig(PretrainedConfig): + """ + Configuration class for DFNRopeVisionTransformer model. + This class inherits from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + """ + + model_type = "DFNRope_vision_transformer" + base_model_tp_plan = {} + + def __init__( + self, + depth=32, + embed_dim=1280, + hidden_size=3584, + hidden_act="quick_gelu", + mlp_ratio=4, + num_heads=16, + in_channels=3, + patch_size=14, + spatial_merge_size=2, + attn_implementation="eager", # new added + pp_data_balance=False, + recompute=False, + attn_sep=False, + vit_first_fwd_bsz=128, + vit_num_recompute_layers=10000, + **kwargs, + ): + """ + Initialize DFNRopeVisionTransformer model configuration with default or specified parameters. + + Args: + depth (int): Number of transformer layers in the model. + embed_dim (int): Dimensionality of the embedding layer. + hidden_size (int): Dimensionality of the feedforward network. + hidden_act (str): Activation function for the feedforward network. + mlp_ratio (float): Ratio between the number of input features and + the number of output features in the feedforward network. + num_heads (int): Number of attention heads in each attention layer. + in_channels (int): Number of channels in the input image. + patch_size (int): + Size of patches in the input image. Defaults to 14. + spatial_merge_size (int): + Spatial merge size for the spatial transformer module. Defaults to 2. + attn_implementation (str): Attention implementation type. Defaults to "eager". + pp_data_balance (bool): Whether to balance data during preprocessing. Defaults to False. + recompute (bool): Whether to use recompute. Defaults to False. + attn_sep (bool): Whether to separate attention computation into two stages. Defaults to False. + vit_first_fwd_bsz (int): First forward batch size for ViT. Defaults to 128. + vit_num_recompute_layers (int): Number of recomputed layers for ViT. Defaults to + """ + super().__init__(**kwargs) + + self.depth = depth + self.embed_dim = embed_dim + self.hidden_size = hidden_size + self.hidden_act = hidden_act + self.mlp_ratio = mlp_ratio + self.num_heads = num_heads + self.in_channels = in_channels + self.patch_size = patch_size + self.spatial_merge_size = spatial_merge_size + self.attn_implementation = attn_implementation + self.pp_data_balance = pp_data_balance + self.recompute = recompute + self.attn_sep = attn_sep + self.vit_first_fwd_bsz = vit_first_fwd_bsz + self.vit_num_recompute_layers = vit_num_recompute_layers + + def get(self, key, default=None): + """get config value by key""" + if hasattr(self, key): + return getattr(self, key) + else: + return default + + +ERNIE_PRETRAINED_INIT_CONFIGURATION = { + "ernie/tiny-random-ernie": { + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 2048, + "model_type": "ernie", + "num_attention_heads": 2, + "num_hidden_layers": 2, + "rms_norm_eps": 1e-06, + "vocab_size": 32000, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "use_cache": False, + "recompute": False, + "use_flash_attn": True, + "use_pure_fp16": False, + }, +} + + +class Ernie4_5_Config(PretrainedConfig): + """ + Configuration class for ERNIE model. + + This class stores the configuration of an ERNIE model, defining the model architecture. + It inherits from PretrainedConfig and can be used to control model outputs. + """ + + model_type = "ernie" + pretrained_init_configuration = ERNIE_PRETRAINED_INIT_CONFIGURATION + base_model_tp_plan = {} + + def __init__( + self, + vocab_size=32000, + hidden_size=768, + intermediate_size=11008, + max_position_embeddings=32768, + num_hidden_layers=2, + num_attention_heads=2, + initializer_range=0.02, # no use + rms_norm_eps=1e-6, + use_cache=False, + use_flash_attention=True, + use_sparse_flash_attn=True, + use_var_len_flash_attn=False, + recompute=False, + recompute_granularity="core_attn", + recompute_use_reentrant=False, + use_rmsnorm=True, + fuse_rms_norm=False, + fuse_ln=False, + pad_token_id=0, + bos_token_id=1, + eos_token_id=2, + fuse_swiglu=False, + use_bias=False, + rope_theta=10000, + fuse_rope=False, + fuse_softmax_mask=False, + use_fast_ln=False, + weight_share_add_bias=True, + fuse_linear=False, + max_sequence_length=1024, + ignored_index=-100, + add_tail_layers=False, + use_recompute_lm_head=False, + use_recompute_loss_fn=False, + refined_recompute=dict(), + attention_probs_dropout_prob=0.0, + hidden_dropout_prob=0.0, + compression_ratio: float = 1.0, + num_key_value_heads=None, + use_sparse_head_and_loss_fn=False, + micro_batch_size=-1, + use_ep_comm_overlap=False, + use_fused_head_and_loss_fn=False, + token_balance_loss=False, + token_balance_seqlen=False, # calculated based on batchsize and seqlen + cachekv_quant: bool = False, + pp_seg_method="layer:ErnieDecoderLayer|EmptyLayer", + **kwargs, + ): + """ + Initialize ERNIE model configuration with default or specified parameters. + + Args: + vocab_size (int): Size of the vocabulary (number of unique tokens) + hidden_size (int): Dimensionality of the encoder layers and the pooler layer + intermediate_size (int): Dimensionality of the "intermediate" (feed-forward) layer + max_position_embeddings (int): Maximum sequence length the model can handle + num_hidden_layers (int): Number of hidden layers in the Transformer encoder + num_attention_heads (int): Number of attention heads for each attention layer + rms_norm_eps (float): The epsilon used by the RMS normalization layers + use_cache (bool): Whether to use caching for faster generation (decoding) + use_flash_attention (bool): Whether to use FlashAttention for optimized attention computation + use_sparse_flash_attn (bool): Whether to use sparse FlashAttention + use_var_len_flash_attn (bool): Whether to use variable-length FlashAttention + recompute (bool): Whether to use gradient checkpointing to save memory + recompute_granularity (str): Granularity of recomputation ("core_attn", "full", etc.) + recompute_use_reentrant (bool): Whether to use reentrant checkpointing + use_rmsnorm (bool): Whether to use RMSNorm instead of LayerNorm + fuse_rms_norm (bool): Whether to fuse RMSNorm operations for optimization + fuse_ln (bool): Whether to fuse LayerNorm operations + pad_token_id (int): Token ID used for padding sequences + bos_token_id (int): Token ID used for beginning-of-sequence + eos_token_id (int): Token ID used for end-of-sequence + fuse_swiglu (bool): Whether to fuse SwiGLU operations + use_bias (bool): Whether to use bias terms in linear layers + rope_theta (float): The base period of the RoPE embeddings + fuse_rope (bool): Whether to fuse RoPE operations + use_fast_ln (bool): Whether to use optimized LayerNorm implementation + weight_share_add_bias (bool): Whether to share bias weights in certain layers + fuse_linear (bool): Whether to fuse linear operations + max_sequence_length (int): Maximum sequence length for positional embeddings + ignored_index (int): Target value that is ignored during loss computation + add_tail_layers (bool): Whether to add additional layers at the end + use_recompute_lm_head (bool): Whether to recompute gradients for language model head + use_recompute_loss_fn (bool): Whether to recompute gradients for loss function + refined_recompute (dict): Dictionary specifying refined recomputation settings + attention_probs_dropout_prob (float): Dropout probability for attention weights + hidden_dropout_prob (float): Dropout probability for hidden layers + compression_ratio (float): Ratio for KV cache compression (1.0 = no compression) + num_key_value_heads (int): Number of key/value heads (for Grouped Query Attention) + use_sparse_head_and_loss_fn (bool): Whether to use sparse attention head and loss function + micro_batch_size (int): Size of micro batches (-1 for automatic) + use_ep_comm_overlap (bool): Whether to overlap communication with computation + use_fused_head_loss_fn (bool): Whether to use fused head and loss function + token_balance_loss (bool): Whether to balance loss by token count + token_balance_seqlen (bool): Whether to balance sequence lengths + cachekv_quant (bool): Whether to quantize key-value cache + pp_seg_method (str): Method for pipeline parallel segmentation + **kwargs: Additional keyword arguments passed to parent class + """ + + # Set default for tied embeddings if not specified. + if "tie_word_embeddings" not in kwargs: + kwargs["tie_word_embeddings"] = False + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + **kwargs, + ) + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.max_position_embeddings = max_position_embeddings + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.use_cache = use_cache + self.recompute = recompute + self.recompute_granularity = recompute_granularity + self.use_flash_attention = use_flash_attention + self.use_sparse_flash_attn = use_sparse_flash_attn + self.recompute_use_reentrant = recompute_use_reentrant + self.use_var_len_flash_attn = use_var_len_flash_attn + self.pad_token_id = pad_token_id + self.bos_token_id = bos_token_id + self.eos_token_id = eos_token_id + self.fuse_swiglu = fuse_swiglu + self.fuse_rms_norm = fuse_rms_norm + self.fuse_ln = fuse_ln + self.use_rmsnorm = use_rmsnorm + self.micro_batch_size = micro_batch_size + + self.max_sequence_length = max_sequence_length + self.use_bias = use_bias + self.weight_share_add_bias = weight_share_add_bias + self.rope_theta = rope_theta + self.fuse_rope = fuse_rope + self.fuse_softmax_mask = fuse_softmax_mask + self.use_fast_ln = use_fast_ln + + self.fuse_linear = fuse_linear + self.ignored_index = ignored_index + self.add_tail_layers = add_tail_layers + self.use_recompute_lm_head = use_recompute_lm_head + self.use_recompute_loss_fn = use_recompute_loss_fn + + self.refined_recompute = refined_recompute + self.skip_recompute_ops = dict() + """ + `refined_recompute` is a dictionary that specifies fine-grained gradient recomputation settings, + which currently only takes effect in Pipeline Parallel (PP) mode. + + In PP mode, this dictionary populates `self.skip_recompute_ops` with the following structure: + - Key (`op_name`): The operation name to configure, with possible values: + * "mlp_row_ln" - MLP row-wise layer normalization + * "flash_attn" - Flash attention operation + * "attention_row_ln" - Attention row-wise layer normalization + * "attention_column_ln" - Attention column-wise layer normalization + * "mlp_column_ln" - MLP column-wise layer normalization + + - Value (`skip_num`): Controls how many times to skip recomputation: + * 0: Never skip recomputation (minimum memory usage) + * -1: Always skip recomputation (maximum memory usage) + * [0,1,...,12]: Skip recomputation for specified number of times + * ≥12: Equivalent to -1 (always skip recomputation) + + This allows precise control over memory/computation tradeoffs for different operations. + """ + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.hidden_dropout_prob = hidden_dropout_prob + self.compression_ratio = compression_ratio + self.num_key_value_heads = num_key_value_heads + self.use_sparse_head_and_loss_fn = use_sparse_head_and_loss_fn + self.use_ep_comm_overlap = use_ep_comm_overlap + self.use_fused_head_and_loss_fn = use_fused_head_and_loss_fn + self.token_balance_loss = token_balance_loss + self.token_balance_seqlen = token_balance_seqlen + self.cachekv_quant = cachekv_quant + self.pp_seg_method = pp_seg_method + + def get(self, key, default=None): + """get config value by key""" + if hasattr(self, key): + return getattr(self, key) + else: + return default + + +class Ernie4_5_MoEConfig(Ernie4_5_Config): + r""" + Configuration class for ErnieMoE model architecture. + + This class stores the configuration for a [`~ErnieModel`] and is used to instantiate + an ErnieMoE model according to the specified arguments. Inherits from [`PretrainedConfig`] + and can control model outputs. + + Attributes: + Inherits all attributes from Ernie4_5_Config and adds MoE-specific configurations. + """ + + model_type = "ernie" + attribute_map = { + "n_positions": "max_position_embeddings", + "n_embd": "hidden_size", + "n_layer": "num_hidden_layers", + "n_head": "num_attention_heads", + "n_inner": "intermediate_size", + "activation_function": "hidden_act", + } + pretrained_init_configuration = ERNIE_PRETRAINED_INIT_CONFIGURATION + base_model_tp_plan = {} + + def __init__( + self, + moe_num_experts: Union[int, list] = 0, + use_recompute_moe=False, + moe_capacity=(), + moe_layer_interval=2, + moe_layer_start_index=0, + moe_layer_end_index=-1, + moe_aux_loss_lambda=1e-2, + moe_z_loss_lambda=1e-4, + moe_orthogonal_loss_lambda=1e-2, + sinkhorn_2gate=True, + sinkhorn_temp=3e-2, + global_aux_loss=False, + moe_dropout_prob=0.0, + moe_group="world", + moe_gate="top2", + moe_intermediate_size: Union[int, list] = 0, + moe_num_shared_experts: int = 0, + moe_reverse_token_drop: bool = False, + moe_gate_act: str = "softmax", + moe_norm_gate_logits=True, + moe_all_to_all_dropout: float = 0.0, + moe_k=2, + moe_use_aux_free: bool = False, + # `moe_group_experts` must be used with `moe_use_hard_gate=True` + moe_group_experts: bool = False, + moe_group_orthogonal_loss: bool = True, + enable_delay_scale_loss: bool = True, + num_acc_steps: int = 1, + fuse_gate_detach_matmul: bool = False, + dpo_config=None, + moe_multimodal_dispatch_use_allgather: str = "", + moe_use_hard_gate=False, + moe_dense_experts_token_type_id=3, + **kwargs, + ): + """ + Initialize ErnieMoE configuration with MoE-specific parameters. + + Args: + moe_num_experts: Number of experts in MoE layers + use_recompute_moe: Whether to use recomputation for MoE layers + moe_capacity: Capacity configuration for MoE layers + moe_layer_interval: Interval between MoE layers + moe_layer_start_index: Starting layer index for MoE + moe_layer_end_index: Ending layer index for MoE (-1 means last layer) + moe_aux_loss_lambda: Weight for auxiliary loss + moe_z_loss_lambda: Weight for z-loss + moe_orthogonal_loss_lambda: Weight for orthogonal loss + sinkhorn_2gate: Whether to use sinkhorn 2-gate routing + sinkhorn_temp: Temperature for sinkhorn routing + global_aux_loss: Whether to use global auxiliary loss + moe_dropout_prob: Dropout probability for MoE layers + moe_group: Group configuration for MoE experts + moe_gate: Type of gating mechanism ('top2', etc.) + moe_intermediate_size: Intermediate size for MoE layers + moe_num_shared_experts: Number of shared experts + moe_reverse_token_drop: Whether to use reverse token dropping + moe_gate_act: Activation function for gating + moe_norm_gate_logits: Whether to normalize gate logits + moe_all_to_all_dropout: Dropout for all-to-all communication + moe_k: Number of experts to route to + moe_use_aux_free: Whether to use auxiliary-free routing + moe_group_experts: Whether to group experts (requires hard gating) + moe_group_orthogonal_loss: Whether to use group orthogonal loss + enable_delay_scale_loss: Whether to enable delayed loss scaling + num_acc_steps: Number of accumulation steps + fuse_gate_detach_matmul: Whether to fuse gate detach matmul + **kwargs: Additional base model configuration parameters + + Note: + When use_recompute_moe is True, recompute_granularity will be changed to full_attn. + """ + + if use_recompute_moe: + logger.warning( + "set `use_recompute_moe`=True, disabling `recompute_granularity=full`, change to full_attn." + ) + if kwargs["recompute"] and kwargs["recompute_granularity"] == "full": + kwargs["recompute_granularity"] = "full_attn" + super().__init__(**kwargs) + + self.moe_num_experts = moe_num_experts + self.use_recompute_moe = use_recompute_moe + self.moe_capacity = moe_capacity + self.moe_aux_loss_lambda = moe_aux_loss_lambda + self.moe_z_loss_lambda = moe_z_loss_lambda + self.moe_orthogonal_loss_lambda = moe_orthogonal_loss_lambda + self.global_aux_loss = global_aux_loss + self.sinkhorn_2gate = sinkhorn_2gate + self.sinkhorn_temp = sinkhorn_temp + self.moe_layer_interval = moe_layer_interval + self.moe_dropout_prob = moe_dropout_prob + self.moe_group = moe_group + self.moe_gate = moe_gate + self.moe_intermediate_size = moe_intermediate_size + self.moe_num_shared_experts = moe_num_shared_experts + self.moe_reverse_token_drop = moe_reverse_token_drop + self.moe_k = moe_k + self.moe_all_to_all_dropout = moe_all_to_all_dropout + self.moe_group_experts = moe_group_experts + self.moe_group_orthogonal_loss = moe_group_orthogonal_loss + self.enable_delay_scale_loss = enable_delay_scale_loss + self.num_acc_steps = num_acc_steps + self.moe_layer_start_index = moe_layer_start_index + self.moe_layer_end_index = ( + self.num_hidden_layers - 1 + if moe_layer_end_index == -1 + else moe_layer_end_index + ) + self.moe_gate_act = moe_gate_act + self.moe_norm_gate_logits = moe_norm_gate_logits + self.moe_use_aux_free = moe_use_aux_free + self.fuse_gate_detach_matmul = fuse_gate_detach_matmul + self.dpo_config = dpo_config + self.moe_multimodal_dispatch_use_allgather = ( + moe_multimodal_dispatch_use_allgather + ) + self.moe_use_hard_gate = moe_use_hard_gate + self.moe_dense_experts_token_type_id = moe_dense_experts_token_type_id + + @property + def multimodel_experts(self) -> bool: + """multimodel experts.""" + return ( + isinstance(self.moe_num_experts, (tuple, list)) + and len(self.moe_num_experts) > 1 + ) + + @property + def use_moe(self) -> bool: + """ + Check if model is using MoE architecture. + + Returns: + bool: True if moe_num_experts > 0, False otherwise + """ + return self.moe_num_experts > 0 + + +class Ernie4_5_VLMoEConfig(Ernie4_5_MoEConfig): + """ + This is the configuration class to store the configuration of a [`~ErnieModel`]. It is used to instantiate an Ernie + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the Ernie-7B. + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + Args: + vocab_size (`int`, *optional*, defaults to 32000): + Vocabulary size of the Ernie model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`~ErnieModel`] or [`~TFErnieModel`]. + hidden_size (`int`, *optional*, defaults to 4096): + Dimension of the hidden representations. + intermediate_size (`int`, *optional*, defaults to 11008): + Dimension of the MLP representations. + num_hidden_layers (`int`, *optional*, defaults to 32): + Number of hidden layers in the Transformer encoder. + num_attention_heads (`int`, *optional*, defaults to 32): + Number of attention heads for each attention layer in the Transformer encoder. + hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + rms_norm_eps (`float`, *optional*, defaults to 1e-12): + The epsilon used by the rms normalization layers. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if `config.is_decoder=True`. + tie_word_embeddings(`bool`, *optional*, defaults to `False`): + Whether to tie weight embeddings + """ + + model_type = "ernie4_5_moe_vl" + attribute_map = { + "n_positions": "max_position_embeddings", + "n_embd": "hidden_size", + "n_layer": "num_hidden_layers", + "n_head": "num_attention_heads", + "n_inner": "intermediate_size", + "activation_function": "hidden_act", + } + base_model_tp_plan = { + "ernie.layers.*.self_attn.qkv_proj": "colwise", + "ernie.layers.*.self_attn.o_proj": "rowwise", + "ernie.layers.*.mlp_text.experts.*.up_gate_proj": "colwise", + "ernie.layers.*.mlp_text.experts.*.down_proj": "rowwise", + "ernie.layers.*.mlp_text.gate": "colwise_rep", + "ernie.layers.*.mlp.experts.*.up_gate_proj": "colwise", + "ernie.layers.*.mlp.experts.*.down_proj": "rowwise", + "ernie.layers.*.mlp.gate": "colwise_rep", + "ernie.layers.*.mlp.up_gate_proj": "colwise", + "ernie.layers.*.mlp.down_proj": "rowwise", + "lm_head": "colwise_rep", + } + + def __init__( + self, + vision_config=None, + im_patch_id=None, + pixel_hidden_size=None, + modality_detach=False, + temporal_conv_size=2, + spatial_conv_size=2, + mm_vocab_size=0, # vocab for mm specialtokens + max_text_id=None, + use_temporal_conv=True, + moe_use_size_all2all=False, + moe_num_attn_experts=False, + moe_dense_experts_token_type_id: int = 3, + moe_use_hard_gate: bool = True, + moe_fuse_experts: bool = False, + moe_use_token_type_bias: bool = False, + disable_ffn_model_parallel=False, + fuse_attn_ffn=True, + rope_3d=True, + freq_allocation=20, + using_precision_check=False, + use_recompute_resampler=False, + resampler_fuse_rms_norm=False, + moe_layer_feed_fake_token=False, + tensor_parallel_degree=1, + **kwargs, + ): + super().__init__(**kwargs) + if isinstance(vision_config, dict): + self.vision_config = DFNRopeVisionTransformerConfig(**vision_config) + else: + self.vision_config = DFNRopeVisionTransformerConfig() + self.im_patch_id = im_patch_id + self.pixel_hidden_size = pixel_hidden_size + self.modality_detach = modality_detach + self.temporal_conv_size = temporal_conv_size + self.spatial_conv_size = spatial_conv_size + self.mm_vocab_size = mm_vocab_size + self.max_text_id = max_text_id + self.use_temporal_conv = use_temporal_conv + + self.moe_use_size_all2all = moe_use_size_all2all + self.moe_num_attn_experts = moe_num_attn_experts + self.moe_dense_experts_token_type_id = moe_dense_experts_token_type_id + self.moe_use_hard_gate = moe_use_hard_gate + self.moe_fuse_experts = moe_fuse_experts + self.moe_use_token_type_bias = moe_use_token_type_bias + self.disable_ffn_model_parallel = disable_ffn_model_parallel + + self.fuse_attn_ffn = fuse_attn_ffn + self.rope_3d = rope_3d + self.freq_allocation = freq_allocation + self.using_precision_check = using_precision_check + self.use_recompute_resampler = use_recompute_resampler + self.resampler_fuse_rms_norm = resampler_fuse_rms_norm + self.moe_layer_feed_fake_token = moe_layer_feed_fake_token + + self.tensor_parallel_degree = tensor_parallel_degree + + @property + def multimodel_experts(self) -> bool: + """Check if model is using more than 1 multimodel experts.""" + return ( + isinstance(self.moe_num_experts, (tuple, list)) + and len(self.moe_num_experts) > 1 + ) + + @property + def use_moe(self) -> bool: + """ + Check if model is using MoE architecture. + + Returns: + bool: True if moe_num_experts > 0, False otherwise + """ + return ( + sum(self.moe_num_experts) > 0 + if self.multimodel_experts + else self.moe_num_experts > 0 + ) + + def to_dict(self, saving_file=False): + """to_dict""" + output = copy.deepcopy(self.__dict__) + if self.vision_config: + output["vision_config"] = ( + self.vision_config.to_dict() + if isinstance(self.vision_config, (DFNRopeVisionTransformerConfig)) + else self.vision_config + ) + + output["model_type"] = self.__class__.model_type + return output diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2db5c6766d13e3361b1d64c58c567419c3ff0d03 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "top_p": 0.8, + "temperature": 0.2, + "repetition_penalty": 1.0, + "frequency_penalty": 0.0, + "presence_penalty": 0.0 + } \ No newline at end of file diff --git a/image_processing_ernie_45t_vl.py b/image_processing_ernie_45t_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..f0fe25ac455553817d283b0db4269c6a2e2f11df --- /dev/null +++ b/image_processing_ernie_45t_vl.py @@ -0,0 +1,586 @@ +# Copyright (c) 2025 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image processor class for Ernie_45T_VL.""" + +import math +from typing import List, Optional, Union +from PIL import Image +import numpy as np + +from transformers.image_processing_utils import BaseImageProcessor, BatchFeature +from transformers.image_transforms import ( + convert_to_rgb, + normalize, + rescale, + resize, + to_channel_dimension_format, +) +from transformers.image_utils import ( + OPENAI_CLIP_MEAN, + OPENAI_CLIP_STD, + ChannelDimension, + ImageInput, + PILImageResampling, + get_image_size, + infer_channel_dimension_format, + is_valid_image, + make_list_of_images, + to_numpy_array, + valid_images, +) +from transformers.utils import TensorType, logging +from transformers.video_utils import VideoInput + + +logger = logging.get_logger(__name__) + + +def round_by_factor(number: int, factor: int) -> int: + """Returns the closest integer to 'number' that is divisible by 'factor'.""" + return round(number / factor) * factor + + +def ceil_by_factor(number: int, factor: int) -> int: + """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.""" + return math.ceil(number / factor) * factor + + +def floor_by_factor(number: int, factor: int) -> int: + """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'.""" + return math.floor(number / factor) * factor + + +def smart_resize( + height: int, + width: int, + factor: int = 28, + min_pixels: int = 4 * 28 * 28, + max_pixels: int = 16384 * 28 * 28, +): + """ + Rescales the image so that the following conditions are met: + + 1. Both dimensions (height and width) are divisible by 'factor'. + + 2. The total number of pixels is within the range ['min_pixels', 'max_pixels']. + + 3. The aspect ratio of the image is maintained as closely as possible. + """ + MAX_RATIO = 200 + if max(height, width) / min(height, width) > MAX_RATIO: + if height > width: + new_width = max(factor, round_by_factor(width, factor)) + new_height = floor_by_factor(new_width * MAX_RATIO, factor) + else: + new_height = max(factor, round_by_factor(height, factor)) + new_width = floor_by_factor(new_height * MAX_RATIO, factor) + + logger.info( + f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)},\ + resize to {max(new_height, new_width) / min(new_height, new_width)}" + ) + + height = new_height + width = new_width + + h_bar = max(factor, round_by_factor(height, factor)) + w_bar = max(factor, round_by_factor(width, factor)) + if h_bar * w_bar > max_pixels: + beta = math.sqrt((height * width) / max_pixels) + h_bar = floor_by_factor(height / beta, factor) + w_bar = floor_by_factor(width / beta, factor) + elif h_bar * w_bar < min_pixels: + beta = math.sqrt(min_pixels / (height * width)) + h_bar = ceil_by_factor(height * beta, factor) + w_bar = ceil_by_factor(width * beta, factor) + + if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels: + raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}") + + return h_bar, w_bar + + +def is_scaled_image(image: np.ndarray) -> bool: + """ + Checks to see whether the pixel values have already been rescaled to [0, 1]. + """ + if image.dtype == np.uint8: + return False + + # It's possible the image has pixel values in [0, 255] but is of floating type + return np.min(image) >= 0 and np.max(image) <= 1 + + +def make_batched_images(images) -> List[List[ImageInput]]: + """ + Accepts images in list or nested list format, and makes a list of images for preprocessing. + + Args: + images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`): + The input image. + + Returns: + list: A list of images. + """ + if ( + isinstance(images, (list, tuple)) + and isinstance(images[0], (list, tuple)) + and is_valid_image(images[0][0]) + ): + return [img for img_list in images for img in img_list] + + elif isinstance(images, (list, tuple)) and is_valid_image(images[0]): + return images + + elif is_valid_image(images): + return [images] + + raise ValueError(f"Could not make batched images from {images}") + + +# Copied from transformers.models.llava_next_video.image_processing_llava_next_video.make_batched_videos +def make_batched_videos(videos) -> List[VideoInput]: + """dummy""" + if ( + isinstance(videos, (list, tuple)) + and isinstance(videos[0], (list, tuple)) + and is_valid_image(videos[0][0]) + ): + return videos + + elif isinstance(videos, (list, tuple)) and is_valid_image(videos[0]): + if isinstance(videos[0], Image.Image): + return [videos] + elif len(videos[0].shape) == 4: + return [list(video) for video in videos] + + elif is_valid_image(videos) and len(videos.shape) == 4: + return [list(videos)] + + raise ValueError(f"Could not make batched video from {videos}") + + +class Ernie_45T_VLImageProcessor(BaseImageProcessor): + r""" + Constructs a adaptive image processor that dynamically resizes images based on the original images. + + Args: + do_resize (`bool`, *optional*, defaults to `True`): + Whether to resize the image's (height, width) dimensions. + resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): + Resampling filter to use when resizing the image. + do_rescale (`bool`, *optional*, defaults to `True`): + Whether to rescale the image by the specified scale `rescale_factor`. + rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): + Scale factor to use if rescaling the image. + do_normalize (`bool`, *optional*, defaults to `True`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`): + Mean to use if normalizing the image. This is a float or list of floats for each channel in the image. + image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`): + Standard deviation to use if normalizing the image. This is a float or list of floats for each channel + in the image. + do_convert_rgb (`bool`, *optional*, defaults to `True`): + Whether to convert the image to RGB. + min_pixels (`int`, *optional*, defaults to `56 * 56`): + The min pixels of the image to resize the image. + max_pixels (`int`, *optional*, defaults to `28 * 28 * 1280`): + The max pixels of the image to resize the image. + patch_size (`int`, *optional*, defaults to 14): + The spacial patch size of the vision encoder. + temporal_conv_size (`int`, *optional*, defaults to 2): + The temporal conv size in resampler. + merge_size (`int`, *optional*, defaults to 2): + The merge size of the vision encoder to llm encoder. + """ + + model_input_names = [ + "pixel_values", + "image_grid_thw", + "pixel_values_videos", + "video_grid_thw", + ] + + def __init__( + self, + do_resize: bool = True, + resample: PILImageResampling = PILImageResampling.BICUBIC, + do_rescale: bool = True, + rescale_factor: Union[float, List[float]] = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = True, + min_pixels: int = 56 * 56, + max_pixels: int = 28 * 28 * 1280, + patch_size: int = 14, + temporal_conv_size: int = 2, + merge_size: int = 2, + **kwargs, + ) -> None: + """init""" + super().__init__(**kwargs) + self.do_resize = do_resize + self.resample = resample + self.do_rescale = do_rescale + self.rescale_factor = rescale_factor + self.do_normalize = do_normalize + self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN + self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD + self.min_pixels = min_pixels + self.max_pixels = max_pixels + self.patch_size = patch_size + self.temporal_conv_size = temporal_conv_size + self.merge_size = merge_size + self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels} + self.do_convert_rgb = do_convert_rgb + + def set_pixels(self, min_pixels=None, max_pixels=None, msg=""): + """set_pixels""" + if min_pixels is not None: + assert ( + isinstance(min_pixels, int) and min_pixels >= 0 + ), "min_pixels must be positive int" + logger.info( + f"{msg} Ernie_45T_VLImageProcessor set min_pixels = {min_pixels}" + ) + self.min_pixels = min_pixels + self.size["min_pixels"] = int(min_pixels) + if max_pixels is not None: + assert ( + isinstance(max_pixels, int) and max_pixels > 0 + ), "max_pixels must be positive int" + logger.info( + f"{msg} Ernie_45T_VLImageProcessor set max_pixels = {max_pixels}" + ) + self.max_pixels = max_pixels + self.size["max_pixels"] = int(max_pixels) + + def get_smarted_resize(self, height, width, min_pixels=None, max_pixels=None): + """dummy""" + actual_min_pixels = min_pixels if min_pixels is not None else self.min_pixels + actual_max_pixels = max_pixels if max_pixels is not None else self.max_pixels + resized_height, resized_width = smart_resize( + height, + width, + factor=self.patch_size * self.merge_size, + min_pixels=actual_min_pixels, + max_pixels=actual_max_pixels, + ) + return (resized_height, resized_width), ( + resized_height // self.patch_size, + resized_width // self.patch_size, + ) + + def _preprocess( + self, + images: Union[ImageInput, VideoInput], + do_resize: bool = True, + resample: PILImageResampling = None, + do_rescale: bool = True, + rescale_factor: float = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = False, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + predetermined_grid_thw=None, + ): + """ + Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`. + + Args: + images (`ImageInput` or `VideoInput`): + Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. + If pixel values range from 0 to 1, set `do_rescale=False`. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + resample (`PILImageResampling`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Scale factor to use if rescaling the image. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Mean to use if normalizing the image. + Can be a float or a list of floats corresponding to the number of channels in the image. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Standard deviation to use if normalizing the image. + Can be a float or a list of floats corresponding to the number of channels in the image. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + """ + images = make_list_of_images(images) + + if do_convert_rgb: + images = [convert_to_rgb(image) for image in images] + + # All transformations expect numpy arrays. + images = [to_numpy_array(image) for image in images] + + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: + # We assume that all images have the same channel dimension format. + input_data_format = infer_channel_dimension_format(images[0]) + + height, width = get_image_size(images[0], channel_dim=input_data_format) + resized_height, resized_width = height, width + processed_images = [] + + if predetermined_grid_thw is not None: + assert len(predetermined_grid_thw) == len( + images + ), f"len(predetermined_grid_thw) {len(predetermined_grid_thw)} == len(images) {len(images)}" + + for img_idx, image in enumerate(images): + if do_resize: + if predetermined_grid_thw is not None: + (resized_height, resized_width) = predetermined_grid_thw[img_idx] + resized_height *= self.patch_size + resized_width *= self.patch_size + else: + resized_height, resized_width = smart_resize( + height, + width, + factor=self.patch_size * self.merge_size, + min_pixels=self.min_pixels, + max_pixels=self.max_pixels, + ) + + image = resize( + image, + size=(resized_height, resized_width), + resample=resample, + data_format=input_data_format, + ) + if do_rescale: + image = rescale( + image, scale=rescale_factor, data_format=input_data_format + ) + + if do_normalize: + image = normalize( + image=image, + mean=image_mean, + std=image_std, + data_format=input_data_format, + ) + + image = to_channel_dimension_format( + image, data_format, input_channel_dim=input_data_format + ) # [C, H, W] + + processed_images.append(image) + patches = np.array(processed_images) + if data_format == ChannelDimension.LAST: + patches = patches.transpose([0, 3, 1, 2]) + + channel = patches.shape[1] # [time, C, H, W] + grid_t = patches.shape[0] + grid_h, grid_w = ( + resized_height // self.patch_size, + resized_width // self.patch_size, + ) + patches = patches.reshape( + [ + grid_t, + channel, + grid_h // self.merge_size, + self.merge_size, + self.patch_size, + grid_w // self.merge_size, + self.merge_size, + self.patch_size, + ] + ) + # [grid_t, grid_h/merge_size, grid_w/merge_size, merge_size, merge_size, C, psz, psz] + patches = patches.transpose([0, 2, 5, 3, 6, 1, 4, 7]) + + flatten_patches = patches.reshape( + [grid_t * grid_h * grid_w, channel * self.patch_size * self.patch_size] + ) # [grid_t * grid_h * grid_w, C * psz * psz] + + return flatten_patches, (grid_t, grid_h, grid_w) + + def preprocess( + self, + images: ImageInput, + videos: VideoInput = None, + do_resize: bool = True, + size: Optional[Union[int, List[int]]] = None, + resample: PILImageResampling = None, + do_rescale: bool = True, + rescale_factor: float = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = False, + return_tensors: Optional[Union[str, TensorType]] = None, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + predetermined_grid_thw=None, + ): + """ + Args: + images (`ImageInput`): + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. + videos (`VideoInput`): + Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If + passing in videos with pixel values between 0 and 1, set `do_rescale=False`. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + size (`Dict[str, int]`, *optional*, defaults to `self.size`): + Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with + the longest edge resized to keep the input aspect ratio. + resample (`int`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only + has an effect if `do_resize` is set to `True`. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Rescale factor to rescale the image by if `do_rescale` is set to `True`. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to + `True`. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + return_tensors (`str` or `TensorType`, *optional*): + The type of tensors to return. Can be one of: + - Unset: Return a list of `np.ndarray`. + - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`. + - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`. + data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. If unset, the channel dimension format is inferred + from the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + + """ + do_resize = do_resize if do_resize is not None else self.do_resize + size = size if size is not None else self.size + resample = resample if resample is not None else self.resample + do_rescale = do_rescale if do_rescale is not None else self.do_rescale + rescale_factor = ( + rescale_factor if rescale_factor is not None else self.rescale_factor + ) + do_normalize = do_normalize if do_normalize is not None else self.do_normalize + image_mean = image_mean if image_mean is not None else self.image_mean + image_std = image_std if image_std is not None else self.image_std + do_convert_rgb = ( + do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + ) + + if images is not None: + images = make_batched_images(images) + + if images is not None and not valid_images(images): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " + "torch.Tensor." + ) + + data = {} + if images is not None: + pixel_values, vision_grid_thws = [], [] + for img_idx, image in enumerate(images): + if predetermined_grid_thw is not None: + predetermined_grid_thw_one = [predetermined_grid_thw[img_idx]] + else: + predetermined_grid_thw_one = None + patches, image_grid_thw = self._preprocess( + image, + do_resize=do_resize, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + do_convert_rgb=do_convert_rgb, + input_data_format=input_data_format, + predetermined_grid_thw=predetermined_grid_thw_one, + ) + pixel_values.extend(patches) + vision_grid_thws.append(image_grid_thw) + pixel_values = np.array(pixel_values) + vision_grid_thws = np.array(vision_grid_thws) + data.update( + {"pixel_values": pixel_values, "image_grid_thw": vision_grid_thws} + ) + + if videos is not None: + videos = make_batched_videos(videos) + pixel_values, vision_grid_thws = [], [] + for images in videos: + patches, video_grid_thw = self._preprocess( + images, + do_resize=do_resize, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + do_convert_rgb=do_convert_rgb, + input_data_format=input_data_format, + predetermined_grid_thw=predetermined_grid_thw, + ) + pixel_values.extend(patches) + vision_grid_thws.append(video_grid_thw) + pixel_values = np.array(pixel_values) + vision_grid_thws = np.array(vision_grid_thws) + + data.update( + { + "pixel_values_videos": pixel_values, + "video_grid_thw": vision_grid_thws, + } + ) + + return BatchFeature(data=data, tensor_type=return_tensors) + + +__all__ = ["Ernie_45T_VLImageProcessor"] diff --git a/model-00001-of-00172.safetensors b/model-00001-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8dfc96aaef3f925e7b60a519a79621c2205df0a --- /dev/null +++ b/model-00001-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:195b7f2a24c3e980c0f57fc6c417855f393e3ad5b17b903818010d24a493f8c5 +size 4949346096 diff --git a/model-00002-of-00172.safetensors b/model-00002-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d0be4efb4e9106cc3a3359ecc0681793c4a0a1a --- /dev/null +++ b/model-00002-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87142391dc9034fb047103d29d42a11030333d322236f736321c6c4a0d1eb388 +size 4932558584 diff --git a/model-00003-of-00172.safetensors b/model-00003-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e34a9d47cb7121ec683a700fe4d7f9c109c1ec4e --- /dev/null +++ b/model-00003-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90fbd25fee484b3854d76322c43a95fc627bb52a45df1df170f70b4ba73145f +size 4932511912 diff --git a/model-00004-of-00172.safetensors b/model-00004-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c432c8949ac434aa83b12d7a2b7beeb979ea496c --- /dev/null +++ b/model-00004-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f662741b81086d95c2a053c1f10d1a05da04c815fbd699a2b692b3a57ef6f78 +size 4974459808 diff --git a/model-00005-of-00172.safetensors b/model-00005-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da45842bd9bb8d364deca36644b3739ea894991c --- /dev/null +++ b/model-00005-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5d5330d10c1ee25574d962fee514bd901de5a087915a9f8a2248d0048059aa +size 4919971520 diff --git a/model-00006-of-00172.safetensors b/model-00006-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ceb838ca7c95f24fcac494a22d6dc6bc6bcffb6 --- /dev/null +++ b/model-00006-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73720c49e1e5f5627941be5d5505c5085fa7a8627eb70912928eb6b0e0cecb99 +size 4932511912 diff --git a/model-00007-of-00172.safetensors b/model-00007-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a9d2b369b653f9cd1256fd97ab90883378317a8 --- /dev/null +++ b/model-00007-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87e9f49af76b49179b5c494e93c547e6738cdc555b0ef3a852695649699b43d +size 4974455328 diff --git a/model-00008-of-00172.safetensors b/model-00008-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..081a1dc1173e13c9db975ec9363f2d394453ee14 --- /dev/null +++ b/model-00008-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4010d96c2d51a83330112df5d57b4581357c39f52f6dc73f8397fd5ed5b515ea +size 4869627776 diff --git a/model-00009-of-00172.safetensors b/model-00009-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3716e477d6352cb24bc7f6ad4c9ddb2e85e1b42a --- /dev/null +++ b/model-00009-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb1d87938bb45360a044de3ec71a93da5a2801d8e20e1c5311def08ea01f44c +size 4982860128 diff --git a/model-00010-of-00172.safetensors b/model-00010-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d25be44d09cdd3c575160c885007965327a7443f --- /dev/null +++ b/model-00010-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:915b36fb50dfe37bde8f50dc8819d92b38d38a7e65817534b1cd98efe476aa24 +size 4932511912 diff --git a/model-00011-of-00172.safetensors b/model-00011-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..269240ea7c0812c7d9d010dc5b9382aa69f1cbe4 --- /dev/null +++ b/model-00011-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a73b71aac77de13c1f20be5b7fc2c4904fa01a32e08b3956932e688d44e8e93 +size 4999630720 diff --git a/model-00012-of-00172.safetensors b/model-00012-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69992035d64e4655deaa6747e5680907ffae3ae1 --- /dev/null +++ b/model-00012-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ec41adf240703b832490bb959e028b14c42131877b6d750a79e304d45388cf +size 4894800568 diff --git a/model-00013-of-00172.safetensors b/model-00013-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff8e18a20bebf4112af75ebae9893c5f5693dd57 --- /dev/null +++ b/model-00013-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b036e1ea8b25c8af238782d5b26802f9fba69ea92091c11b07c66b7f44a2c497 +size 4932511912 diff --git a/model-00014-of-00172.safetensors b/model-00014-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaf7d7563b696e403284cb4d9fa70525fbf5708f --- /dev/null +++ b/model-00014-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83d5da7c18851b833093cb85d6a91cad274161804d35761def996508b0f886e +size 4999626248 diff --git a/model-00015-of-00172.safetensors b/model-00015-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52760189fad39992923d9ceab08f9c77a76fff59 --- /dev/null +++ b/model-00015-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80597422e2d4ad2262dec00360cf0455bf0a01803258f6cb99d4d715cf4576bf +size 4894805072 diff --git a/model-00016-of-00172.safetensors b/model-00016-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a481e6596275b8cc980e60b29d6813a9ad72ed4 --- /dev/null +++ b/model-00016-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d158cbcc097b1b4578cb9c69fc663d2fde9f103c21d14519d2754f50b3331f43 +size 4932511912 diff --git a/model-00017-of-00172.safetensors b/model-00017-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c70c64c3d8a03ffe726108285006a1921942f6ed --- /dev/null +++ b/model-00017-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888982133c4c9f539f9d9a7f4313c6f3ed6dfde6e189ce6f2c0d26024368a83f +size 4999621776 diff --git a/model-00018-of-00172.safetensors b/model-00018-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94386bafd6032c285d2530df6592aaa680307fe1 --- /dev/null +++ b/model-00018-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f639b043007cab7fdadf15e62547e1f6c95cce894843a1edad1d21b33e163c22 +size 4894809576 diff --git a/model-00019-of-00172.safetensors b/model-00019-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1424781a2fb4f442e0065e683de6539e776efab --- /dev/null +++ b/model-00019-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a104ec1568eec495232ac60e7fdc3bcfd6d81632801c1e1e51f5f45c186fa44 +size 4932511888 diff --git a/model-00020-of-00172.safetensors b/model-00020-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3ed8f0343175ab236c4b1717d06e3006fec7f3e --- /dev/null +++ b/model-00020-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3746ba6869b4eea8c9e30e53adc4362487f67d93226b9d9f39ede57ba60fb5c +size 4932511912 diff --git a/model-00021-of-00172.safetensors b/model-00021-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fd59ba81e583297c164b00e9ce06e8d2bec816f --- /dev/null +++ b/model-00021-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787eaa5cdbd029798b745166999cf2048d096595f704d1030ba98978234594a1 +size 4999631216 diff --git a/model-00022-of-00172.safetensors b/model-00022-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..622efd9e0314ff10a84bd19941c193749287e5ef --- /dev/null +++ b/model-00022-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c93956173c9694797fa7ac62bae35aab13fd379d317592fab914eda25707be0 +size 4894800064 diff --git a/model-00023-of-00172.safetensors b/model-00023-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcc5dc81fcd9e69855b281731fa42491fb0585bd --- /dev/null +++ b/model-00023-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca13fc86593d4cffb68c3ee43209b4c39fc38ed201e34bdf97ef0c491283dcc +size 4932511912 diff --git a/model-00024-of-00172.safetensors b/model-00024-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34660dfcc330b78215c77b55921d673b4b341418 --- /dev/null +++ b/model-00024-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd32315858172637b346143cfa1901819a00ec0946507394b44316d9c3a000a +size 4999626744 diff --git a/model-00025-of-00172.safetensors b/model-00025-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7c162c8449e3581b3d26d01da764adbd3072460 --- /dev/null +++ b/model-00025-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bffa1ff999d36521f9985298f78a6c1b0985ad1b721829a877bb1c087c4cd3 +size 4894804648 diff --git a/model-00026-of-00172.safetensors b/model-00026-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9617b435e1b6a494e6ba22735959c815124d6793 --- /dev/null +++ b/model-00026-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d84e58b40c9e10bd5f3baf0c6e8b93eb672d2c24a6e4d82fb7702453c6e4733 +size 4932512000 diff --git a/model-00027-of-00172.safetensors b/model-00027-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfe65349a343da77fdaeb36b2855fd50e6de88d5 --- /dev/null +++ b/model-00027-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb754febe0841f00b1abce420f4862b8902da35a88ec28eb0321d7080d7bf41 +size 4999622368 diff --git a/model-00028-of-00172.safetensors b/model-00028-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..503347d139a5bc2bec174c25cda8adb93e694cd0 --- /dev/null +++ b/model-00028-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a3e6e47c4c1c523b7469771cb24908df0ab857b0f643c8ec3923427f80881e +size 4894809264 diff --git a/model-00029-of-00172.safetensors b/model-00029-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae8dda52c58178cc336d8e480260a8391d905466 --- /dev/null +++ b/model-00029-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31a3d47648d7a6dca63fe95c40bdd711562915f18284f0a13dd1fb667d6ef6b0 +size 4932511976 diff --git a/model-00030-of-00172.safetensors b/model-00030-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87c3556e5cb01b40304abbda48bf97b4c423ed2b --- /dev/null +++ b/model-00030-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e31567a931c86581db37987aef75a6c1655edabe1e738ab351e426352bcacbf +size 4932512000 diff --git a/model-00031-of-00172.safetensors b/model-00031-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a79c2809cb7191cb46284ef97e1f64880ea9ab52 --- /dev/null +++ b/model-00031-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936238bc8780b839dbff8eb90ff1dca4cbc3d5fd98bf77427b7399ef2982dda3 +size 4974465936 diff --git a/model-00032-of-00172.safetensors b/model-00032-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bd9fc941ce6a520439ba3849b98ee8aba08d0e3 --- /dev/null +++ b/model-00032-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e540b0d66e7a124ce8a104089b951c582660a0316a475cc82d6b227e33d3e71a +size 4919965616 diff --git a/model-00033-of-00172.safetensors b/model-00033-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5ac3dfdbfad63c451446d243083293d26127363 --- /dev/null +++ b/model-00033-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4a5d3ecfe0be283e611aa6b2726fb74d7c0394deceff47d7081d4d1e1653e6 +size 4932512000 diff --git a/model-00034-of-00172.safetensors b/model-00034-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..586eedacecbc006debcd58cfb7bdb08c84ceafac --- /dev/null +++ b/model-00034-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d86bdcc303528a072d87330bad8f9834c89e41a4903a84bbe83e9632356856 +size 4974461432 diff --git a/model-00035-of-00172.safetensors b/model-00035-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..331ab86610835fbf7428cc1da6ca08f14842aad2 --- /dev/null +++ b/model-00035-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f976f0ad4e1193747ac810a5a8e8540729cbb36be37d0c73862dfb6ce92e11 +size 4919970176 diff --git a/model-00036-of-00172.safetensors b/model-00036-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c6f531172aafef23f345b82bbea34bfd0d896d7 --- /dev/null +++ b/model-00036-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c9e0501aed6f63c2c79f9f6789d75b0d6ba24829bf307086c8c604b3d76fac +size 4932512000 diff --git a/model-00037-of-00172.safetensors b/model-00037-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..765ee7d0a8f2c8ba90dbee1e14fc4cf63f4710af --- /dev/null +++ b/model-00037-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36b39a75641d8d36c95093d71ad5cd3c0be990cfcaf3a386eb22a7bfc94ac2d +size 4974456920 diff --git a/model-00038-of-00172.safetensors b/model-00038-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea7a0c96254f3c32a1875c206117d95c1279bd9a --- /dev/null +++ b/model-00038-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6546cc35f7d0da23907399aaa77339a297776f35bc8e648d4b226bff9fc14f84 +size 4919974704 diff --git a/model-00039-of-00172.safetensors b/model-00039-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e0a00b494b2e73828a142f8eb284d54e4441af7 --- /dev/null +++ b/model-00039-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45800cfa96ccccfc98fcb2d49dd82c4c2a882d44bfe92fff72e7fe59b8f1729a +size 4932511976 diff --git a/model-00040-of-00172.safetensors b/model-00040-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f62493238129abf5a6296d9f13059b2532fbcff1 --- /dev/null +++ b/model-00040-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1101b8b092547fa28f62871839880d5778b30d2721c9b96977a4c7781dd6c80e +size 4932512000 diff --git a/model-00041-of-00172.safetensors b/model-00041-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..165d85e1f3452d22c628ad79721d82272b81bba7 --- /dev/null +++ b/model-00041-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb54537c40ff50c25427b47a3dc8498e6479d5121ce75fbf99c296205ffcc0d2 +size 4999632400 diff --git a/model-00042-of-00172.safetensors b/model-00042-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c43206f8fffce366cfb677f11550994bdcc0f3ca --- /dev/null +++ b/model-00042-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a36487c1d0903c3d25abfb350876b465f791c39138f02b1c9ce83e5da3e3cd1 +size 4894799168 diff --git a/model-00043-of-00172.safetensors b/model-00043-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d976ba457cef6c7da968e86072b7946d3d8df2c0 --- /dev/null +++ b/model-00043-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1c0638a308a0157f6742e641f4acf7cd6ce830d13e6a686a718f650b14c575 +size 4932512000 diff --git a/model-00044-of-00172.safetensors b/model-00044-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb5ceca394ceefafd6e0bbf91ce8c36abe17cda1 --- /dev/null +++ b/model-00044-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:403721551061c960ba27fab0c5bcb2926d6342561225b053a106dc68cf19cb7e +size 4999627880 diff --git a/model-00045-of-00172.safetensors b/model-00045-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15772861d30f89004322d7e09f9359f72091fb84 --- /dev/null +++ b/model-00045-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5d994f4d2c176c4510116ce57f965829956a7a236503317d34f9f011311c9f +size 4894803720 diff --git a/model-00046-of-00172.safetensors b/model-00046-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fe097bb7d1ffbebc5514e2cf161bbc2c014ad4b --- /dev/null +++ b/model-00046-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa0de7f4d784f16e85211f5cdc917331f0a4e23375efbb45e3fda229cdfc26f +size 4932512000 diff --git a/model-00047-of-00172.safetensors b/model-00047-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2bd9ad4eb8d836f850974c7982ee4b490c8bcff --- /dev/null +++ b/model-00047-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04575fdf90cfa359ff237e4902b346b8420c288068652705d7857df66f83e84e +size 4999623368 diff --git a/model-00048-of-00172.safetensors b/model-00048-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7243f6fe1d5c1db0936af389a45849324edc5cc --- /dev/null +++ b/model-00048-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253f8640eb3b22f3b939efde01155b44a5dcfd6a920a60a1814aa596af6024b0 +size 4894808256 diff --git a/model-00049-of-00172.safetensors b/model-00049-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..034f045dbf90f675407a832faaac438fcb92ab99 --- /dev/null +++ b/model-00049-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a258a3b1be958316ff0812b741ec229e9d161d655ed0489a2c01e1bac8c5672c +size 4932511976 diff --git a/model-00050-of-00172.safetensors b/model-00050-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad8d6dc7a4f24705e41332a4be6f8bb3e9de4548 --- /dev/null +++ b/model-00050-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c607c86091c86481f91e11df75ec87287b59f91ad72c2041a7ec032e2a87da1 +size 4932512000 diff --git a/model-00051-of-00172.safetensors b/model-00051-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55dca60b3a6951afbddc808ae3c54405bfc16b46 --- /dev/null +++ b/model-00051-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c72e2dbe5685f977a8a9bbfbab987fd1647b72fad4eadb7776df85da5a157f +size 4999632904 diff --git a/model-00052-of-00172.safetensors b/model-00052-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86411ca452bf780b3537f8f865dd400f94bbe03f --- /dev/null +++ b/model-00052-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67566bb4d7a64e4058052ed422427bc1d4d882804c631fdc7e3f6784b29ada8a +size 4894798664 diff --git a/model-00053-of-00172.safetensors b/model-00053-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74e1cd5c55f7c197cef99fb161e8b55d3ff729f6 --- /dev/null +++ b/model-00053-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:464ceef25b452fa380726b1dda5e45f314a2a895f5dce85c6e8bbf65faf35d09 +size 4932512000 diff --git a/model-00054-of-00172.safetensors b/model-00054-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2617012d8d77465a54799c929b04336add76cae1 --- /dev/null +++ b/model-00054-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488b437620add63244c1a172860bc2891f22ef44f5e01c47b37e3ecefe9feaaa +size 4999628384 diff --git a/model-00055-of-00172.safetensors b/model-00055-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbd7d1247e3e0d73a6c31f3b379922c3c091610f --- /dev/null +++ b/model-00055-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e6153a5113a2844a086610a91fef82de742dd0ac8c842ab0a1608c6f0521d3 +size 4894803224 diff --git a/model-00056-of-00172.safetensors b/model-00056-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdcd0a9e72e292944ea7dd9b964f5e8af0b96b50 --- /dev/null +++ b/model-00056-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd62365fb2f03834a21f0868b674ff45f75cd38a8102efaa0b02eb44782b5dd +size 4932512000 diff --git a/model-00057-of-00172.safetensors b/model-00057-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6130ae8c95641ee15302d5d80eb294cf9c383a99 --- /dev/null +++ b/model-00057-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9df55f1f8204e829b62b44dbcb8c030e76b2ef0d0f7ff6c21a89228b9b7b425e +size 4999623872 diff --git a/model-00058-of-00172.safetensors b/model-00058-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6512d520c2d52ad46aa57019e1bdd4159ce4def9 --- /dev/null +++ b/model-00058-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebe002bb2b0d80d9e5b05869172a46f38669b9fe8464150796e82b4849fd4ec +size 4894807752 diff --git a/model-00059-of-00172.safetensors b/model-00059-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f23c9ba7b679318e0452f436f70bbac3f34a4334 --- /dev/null +++ b/model-00059-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97340dd77837d8dc441b76fd74fcd21b1eeeaad2fbafd49d8f0371b9ac82ae00 +size 4932511984 diff --git a/model-00060-of-00172.safetensors b/model-00060-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f36f35306a8dfffa26ab76272c0083efd5027e6 --- /dev/null +++ b/model-00060-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db08bcb5ae1edc26bd3ba2cf57720c36b14883802705b72c6b023876577f52fd +size 4932512000 diff --git a/model-00061-of-00172.safetensors b/model-00061-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b7136b9f42a21996da12d810c5ed4a53269bd01 --- /dev/null +++ b/model-00061-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb27570e6b9063f7b69acb41722941c4a2c9b0318547e347aceaba166791eda +size 4974467464 diff --git a/model-00062-of-00172.safetensors b/model-00062-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96129b05bbb76720b1eaf0ab5ebec5e92ccc2c6e --- /dev/null +++ b/model-00062-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640ad4cd2e737b683b3a37910e7baf6a10b01f7472e36c88aed450239e4fd589 +size 4919964112 diff --git a/model-00063-of-00172.safetensors b/model-00063-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1296c6d910fcd1ccdd0c445e83f130976c94d15b --- /dev/null +++ b/model-00063-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded992ce146f15993605dbde4a1b2877ce6ff23f7f771324e07c8eeceea183c8 +size 4932512000 diff --git a/model-00064-of-00172.safetensors b/model-00064-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd0181d4e7729860b690419e8b58a8164b05b9b4 --- /dev/null +++ b/model-00064-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ac1e147f636fefeda3233f43f8e1244559450e3f1b8436c51a658f4f28e7f7 +size 4974462936 diff --git a/model-00065-of-00172.safetensors b/model-00065-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84b581c634edd5c34199b55dae1eec53ca01f6ee --- /dev/null +++ b/model-00065-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ce6c63782fc1dbdec58e8c038786214437473621a2cf246736a3afff1eb1da +size 4919968664 diff --git a/model-00066-of-00172.safetensors b/model-00066-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5b9b70dee71c82d8cb86937bcac9952995e436b --- /dev/null +++ b/model-00066-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f876f0288f11653b5cad9263f2d928a17e154b4aeb96d2cd149c1b7e332527fd +size 4932512000 diff --git a/model-00067-of-00172.safetensors b/model-00067-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7e47b8f032bbc716071de2ac8280fe531e80825 --- /dev/null +++ b/model-00067-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c73ac1596baf3e93d07a9e29aa081f6881059ba81f508c2f4968617f44f66d +size 4974458424 diff --git a/model-00068-of-00172.safetensors b/model-00068-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..759edee4b5ea6567d4a47be139b1bb809af4df3a --- /dev/null +++ b/model-00068-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3485843e292f51d48370af59f51e0b8b3e97a3de467f1091e3b9d4f6496f8bcd +size 4919973192 diff --git a/model-00069-of-00172.safetensors b/model-00069-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d034833515323ad88df77564f254bb428df22c13 --- /dev/null +++ b/model-00069-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96c951945b9a6c7f4a9e5e38a503728c2d70b873efb922ec4d7f5e68188c5ac8 +size 4932511984 diff --git a/model-00070-of-00172.safetensors b/model-00070-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b754a7679bb69a3b3092407fe7363355b5cf293 --- /dev/null +++ b/model-00070-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5d421377b381dafccc35180e910a6cb4e3ff73ab0a6dec465eca9657f2f4b5 +size 4932512000 diff --git a/model-00071-of-00172.safetensors b/model-00071-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b99317db890394ead03e4382c5f949054f146bc --- /dev/null +++ b/model-00071-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c96644fdd2d663c9e8e6aa0445f66b600bb64ed850ef8e961e33262e27992d5 +size 4999633920 diff --git a/model-00072-of-00172.safetensors b/model-00072-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56eaf3a6e2db58b2c68f30f333384c128f4e9f36 --- /dev/null +++ b/model-00072-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a6998ae102c8ddce4de9c307c2f4cc16b17a7cf6c088d238eda0e2c944027b +size 4894797664 diff --git a/model-00073-of-00172.safetensors b/model-00073-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07f847470b517ea6610c662ed2166699a75f42a5 --- /dev/null +++ b/model-00073-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0649f51c84ecb01ce05e2a24f3b181ba9fb7185082473f4fb7d09506d0561684 +size 4932512000 diff --git a/model-00074-of-00172.safetensors b/model-00074-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3551d476700c55abecfe8191e41d488ff8af65d7 --- /dev/null +++ b/model-00074-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79015393867241c441afde7e3b1366743ac6a7109c6dc32cc6048daf81f1f902 +size 4999629384 diff --git a/model-00075-of-00172.safetensors b/model-00075-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fb1234feee3c2ed349646e21d8cc52489bcfce0 --- /dev/null +++ b/model-00075-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d967154d3ce2fb7b4d9cb80e4d25c5dae00142038a2ad4e9490c8cbb0a15f341 +size 4894802208 diff --git a/model-00076-of-00172.safetensors b/model-00076-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6715412deffcfdaf99856f7901086cfb89d1c12a --- /dev/null +++ b/model-00076-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82487a7a5e21f04476a0ac71228fb53e887d6db8b9aabc62ba0b444b0c19b145 +size 4932512000 diff --git a/model-00077-of-00172.safetensors b/model-00077-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3eec3847f0396af7fa36bc736ef397a68f2fbe6 --- /dev/null +++ b/model-00077-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc348ce70e7428f9fc76a3a61aee88944fa9da16b25a211efebe0a076483fb4 +size 4999624872 diff --git a/model-00078-of-00172.safetensors b/model-00078-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..042e123cfe3e115477aa9068e86780cc03f29366 --- /dev/null +++ b/model-00078-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade72ae88607a2228ab77d6578a60d753c7af63714045b9a7205b953863eb517 +size 4894806744 diff --git a/model-00079-of-00172.safetensors b/model-00079-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3eb30cc7b05966510359de5477424e97c6834848 --- /dev/null +++ b/model-00079-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42883478ef4af8dff766d4f142dc3ac24a0cb87020b1c5516d30010345cf380e +size 4932511992 diff --git a/model-00080-of-00172.safetensors b/model-00080-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6e93969c986391f7d1541d233496c60584ed6c8 --- /dev/null +++ b/model-00080-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5098224fa580f5e4647bed708a74f7b054e76a5555d1909e5e5e5778a2d3b0c +size 4932512000 diff --git a/model-00081-of-00172.safetensors b/model-00081-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9bd5476c91990cbf9edc37efd02ba2425b9eb84 --- /dev/null +++ b/model-00081-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57c2631fdb99d36f793290132154bebe5c1385c336c6fcf45fd107349432e02 +size 4999634432 diff --git a/model-00082-of-00172.safetensors b/model-00082-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41b2a0f8f90da4d1cd4f49076cef948c219cdeaf --- /dev/null +++ b/model-00082-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996894c505a1cf6a652be69a3dac24ed08e831bea8036c01ebc045d5f3a070f9 +size 4894797160 diff --git a/model-00083-of-00172.safetensors b/model-00083-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2597f654c00bddee221690c8f22ea9ab4c9f1da --- /dev/null +++ b/model-00083-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788bb6ba222c915c855b25088cd669a4bc2e33bd978f2261bc9fe4115ccb4dac +size 4932512000 diff --git a/model-00084-of-00172.safetensors b/model-00084-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32c2fd70bbf2d26c735ee40edaf3b10a755945fb --- /dev/null +++ b/model-00084-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d37d350220d9ab0f27c7edbb47ec5b13a9d2631b0514ada7d00125ac1b9582bd +size 4999629880 diff --git a/model-00085-of-00172.safetensors b/model-00085-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4b0e3a694ffd5f084594a8b00337d2a0c5b462e --- /dev/null +++ b/model-00085-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f6e490fc7635eb12b4f6cdc287c2a21edb5d6a814bf7bbb02343608bf926b5 +size 4894801704 diff --git a/model-00086-of-00172.safetensors b/model-00086-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..581582d973aa5a859d4b27e3ca21fb9e6fde61d6 --- /dev/null +++ b/model-00086-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d66177a3a07aff777b8bcdab5a9d433cc272636bd10be5384cd7986fa3a5ff3e +size 4932512000 diff --git a/model-00087-of-00172.safetensors b/model-00087-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b79b3241c944906ffffc58887854d93d51378ccc --- /dev/null +++ b/model-00087-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83629da6c918a1a5ceadb7f324fdd7caeacafd73d6bc998ff92dc3c11b0db82b +size 4999625376 diff --git a/model-00088-of-00172.safetensors b/model-00088-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22c1f1c426c27451fa26699fd86264341f447556 --- /dev/null +++ b/model-00088-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58e64537e1e0cffb4f14417af2953682cf92b88b677122a922c3e9912bd7b578 +size 4894806240 diff --git a/model-00089-of-00172.safetensors b/model-00089-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..751b9446ff9fc08fdb825290a9ca3eb3dddfeebd --- /dev/null +++ b/model-00089-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800d89920653c930fb701c95c4c8a0625515833ac8bd66658b7442ff472abdd7 +size 4932511992 diff --git a/model-00090-of-00172.safetensors b/model-00090-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b7c8a2eab140fa1f571a96bbdec0c565d51bb54 --- /dev/null +++ b/model-00090-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f176654ebe859116a26f815cd904d65146b426894eb31219905bcb2d809e07 +size 4932512000 diff --git a/model-00091-of-00172.safetensors b/model-00091-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6fc436a30cfbd6733159620edf1680a2f5f42ff --- /dev/null +++ b/model-00091-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6b19c2238ab71d03b25d534430142551ae20f568c2a2bb6edf3ec7381edda5 +size 4953514688 diff --git a/model-00092-of-00172.safetensors b/model-00092-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f69309ac865ea17d7f54ce1d9f8ceb30500552a1 --- /dev/null +++ b/model-00092-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5d1e8d0831eac4694cb9050445fd6d39bc160548a86db2fec21ebfe650f172 +size 4940916920 diff --git a/model-00093-of-00172.safetensors b/model-00093-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5376e0d2655db32e5217ca43dda80d659daf8648 --- /dev/null +++ b/model-00093-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac12e77b1438319e79113f2a358fdb5167322f034f4984e22ae9dc85a347e09 +size 4932512000 diff --git a/model-00094-of-00172.safetensors b/model-00094-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a39534737f91d30000fb88f35b886fb41e829133 --- /dev/null +++ b/model-00094-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7480da22556734ba808445aacf5771380f0f32bd36e83dbbf6b20952d47deb2b +size 4974464432 diff --git a/model-00095-of-00172.safetensors b/model-00095-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a025207e3ff5ba77e9b4d44ab1ef32f19065d12 --- /dev/null +++ b/model-00095-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808e7946ad687dc07348e0011a9fadaa09b6e620530447adb3f8869cce7a9150 +size 4919967144 diff --git a/model-00096-of-00172.safetensors b/model-00096-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8521c2777c7e9405f977ce41ede28313b731cddc --- /dev/null +++ b/model-00096-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de128b6c47a206d292a9c1589dd1337bf606e9a4c1b76f1bbb174a20a2855a75 +size 4932512000 diff --git a/model-00097-of-00172.safetensors b/model-00097-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e691e85c46d17ca41a9c6cf6b78cd67be0d8d53 --- /dev/null +++ b/model-00097-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c862ae207665fb5b1e3e05a77b607218539c0765a46841774690362ebbafbfb +size 4974459928 diff --git a/model-00098-of-00172.safetensors b/model-00098-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b89e6dc3badecda796509fcd91d3f061ba0dd73 --- /dev/null +++ b/model-00098-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb6ee570786e3ba3667ea5e6e12adadff0eda7621ded84612d6a02498dfc3a9 +size 4919971680 diff --git a/model-00099-of-00172.safetensors b/model-00099-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61b69db388cc8e7102adc372fe255c4b4074b866 --- /dev/null +++ b/model-00099-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f1be5387d24ca02f2de0b0f8cc67810b185eefdf54bf4f834b35a8e14be727 +size 4932511992 diff --git a/model-00100-of-00172.safetensors b/model-00100-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68f22527adeb3e794f6665aeb93011cfc9d6dca3 --- /dev/null +++ b/model-00100-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a22825113bba1fbd4d577328073565def8a48d1eab691596de0105642a3f97 +size 4974455416 diff --git a/model-00101-of-00172.safetensors b/model-00101-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..299afd424eb0972dd4febabeb581acb0aacc0938 --- /dev/null +++ b/model-00101-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4348d2e9884011e737d8622ae3d1a742e7eab3cd9be367e09a498f6d207fb9fb +size 4869627968 diff --git a/model-00102-of-00172.safetensors b/model-00102-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7a82cc9700d1189a282968ba7d95ab835cd37ef --- /dev/null +++ b/model-00102-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1697ba5366d26676f62ff6fbbbb2a5c7e2f80e8cad8ba4690d033e582d6954ee +size 4982860216 diff --git a/model-00103-of-00172.safetensors b/model-00103-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e34b88c2e3cf44a2fc3aab5e153c19f6bc6cd2e --- /dev/null +++ b/model-00103-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63fce1549cbc8d7b79c7ad030a4a3dcca0d769c78e6281dac81dbce0d3d269b +size 4932512000 diff --git a/model-00104-of-00172.safetensors b/model-00104-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81af97febd2d90bd7daacd53284029f4ab403588 --- /dev/null +++ b/model-00104-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9355817f25231ebddcd1f53c646d21217afa80d9efa092a035a8faf48f604fa4 +size 4999630880 diff --git a/model-00105-of-00172.safetensors b/model-00105-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a49785d5d0dc877d07105fc18e08869a5460b1c8 --- /dev/null +++ b/model-00105-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3055f0b0c8edd964b44ef251ba3d5485f1a79669ac016d015a931b68cf4fdcf +size 4894800688 diff --git a/model-00106-of-00172.safetensors b/model-00106-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..975d7411200db85ac07eb9f1bd04768033509975 --- /dev/null +++ b/model-00106-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecfa50ef4b477473813f53842d41f9d85c5c28d84a1cc74c1e060a4fa1d67aa +size 4932512000 diff --git a/model-00107-of-00172.safetensors b/model-00107-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1804d66b8c56e5016707fae04da12e20cd92e653 --- /dev/null +++ b/model-00107-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a576194c22e645689979c4d6ce4bfdd61f47a0a03343f7b9824c8c53265bfa2 +size 4999626376 diff --git a/model-00108-of-00172.safetensors b/model-00108-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1edf78ced8535249a2ad6740dda015335b3e3dcd --- /dev/null +++ b/model-00108-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28644a425dcd4ea0e6de2d4df5deb006a3da21ca2c06fa0f0c66d62e09a91184 +size 4894805232 diff --git a/model-00109-of-00172.safetensors b/model-00109-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f8f4bf59c28141216ea547f31afcb797ff01fbb --- /dev/null +++ b/model-00109-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33def5f9a40df00aa9844f33e4d331ab32e485770cfa5685500a5fffc9c72ada +size 4932512000 diff --git a/model-00110-of-00172.safetensors b/model-00110-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60e15da48760a197acef828643862ea7ef73da67 --- /dev/null +++ b/model-00110-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d8ca4979002663cb223fd16f94d80a960b511283676c1e5bc6fc180df0817f +size 4999621864 diff --git a/model-00111-of-00172.safetensors b/model-00111-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94a74c3695e14c752759c4795cc64a653ea99f23 --- /dev/null +++ b/model-00111-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b12dd5d212e25af10d7fe890de3e301f531a707ce93acffa8ee25a375f72cb14 +size 4894809768 diff --git a/model-00112-of-00172.safetensors b/model-00112-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efde95af8cee1a0e66c7303413964a20b88d89e --- /dev/null +++ b/model-00112-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c6c9c9248f2c8a3abbe35c10801b7bc26ba0d104c6a286f1ff3919226585a6 +size 4932511968 diff --git a/model-00113-of-00172.safetensors b/model-00113-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ea6a7df0b5b6aa727af6c58112361d830bc9a5e --- /dev/null +++ b/model-00113-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a59c4cf69f2a701dc2b89dde33fc5060a8583ecee18cdf2b95d76a9347628bf +size 4932512000 diff --git a/model-00114-of-00172.safetensors b/model-00114-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11f76df5b35eeafcdda36e97a01b8d4cdb3e7fb7 --- /dev/null +++ b/model-00114-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ba328e4c12e482c46f52b3b2c37adb47b964095948ecd4ea3fb8ae04f2dfbc +size 4999631384 diff --git a/model-00115-of-00172.safetensors b/model-00115-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ee24963023b944697731a2c674e09bb1324fd5f --- /dev/null +++ b/model-00115-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ca5dfdc880c9b5a94127c79a57722a0dc3161d6666e386c47c71f14a9c0e53 +size 4894800176 diff --git a/model-00116-of-00172.safetensors b/model-00116-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3654f6b38b5f6db10aff1ee3a9705ffaf37818d8 --- /dev/null +++ b/model-00116-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c987722060cfe7fe362ea75f21285dad8cbf526cffcac56865520e1c6b94338e +size 4932512000 diff --git a/model-00117-of-00172.safetensors b/model-00117-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f72b8a638b6ffc777d447eb8811047ffa6374f20 --- /dev/null +++ b/model-00117-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb6a5e0147f403f6e31d269f0efbfa4f2964c32a61305d5b7b9511e621e519f +size 4999626880 diff --git a/model-00118-of-00172.safetensors b/model-00118-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f39a4fe68a4c495da25eb7c2dd6cac54df87189 --- /dev/null +++ b/model-00118-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83978683175aec4301a166e2503511ebc6d63507904b75a5e41b92279a9e8409 +size 4894804728 diff --git a/model-00119-of-00172.safetensors b/model-00119-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68e057b53f7a1a10052483f9bc49f93a28f3f733 --- /dev/null +++ b/model-00119-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455b3e06a79383c48f448588d91c78c54bfed1822133d618c7c67e1adb81e6a1 +size 4932512000 diff --git a/model-00120-of-00172.safetensors b/model-00120-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5ce547ff205a262cffd9d07eded9f8d706e0f9d --- /dev/null +++ b/model-00120-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246e5003873042873dc0342bbb83ba890bcb82157220a806f16f3ded38d1ba45 +size 4999622368 diff --git a/model-00121-of-00172.safetensors b/model-00121-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0b97d9b8863a0cf0018f165c5b504a050a46dde --- /dev/null +++ b/model-00121-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df278c91934abe45a010378dedf3addf4f7eb5befe4d5527ca8f66d5a7801769 +size 4894809264 diff --git a/model-00122-of-00172.safetensors b/model-00122-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b410365d4b0119e719d1e42c20e4792a5e4853c --- /dev/null +++ b/model-00122-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb84589a2192503e0a8eb62a28b8103579c6a6282566d572739027ca233d9f3e +size 4932511976 diff --git a/model-00123-of-00172.safetensors b/model-00123-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b87f510133a84b61ef56df1cf0ead37d814df9f8 --- /dev/null +++ b/model-00123-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c89e014241c851a09b4113902a47804064cde7cad769a2e531d2b5855cd72cd +size 4932512000 diff --git a/model-00124-of-00172.safetensors b/model-00124-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1da3e9ae5c222e114ba81171417a3c9a59711078 --- /dev/null +++ b/model-00124-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25bb53279a4105a56bb37cf6514fdeb361f975c94aa6642ecc3ece4d16cff9bf +size 4974465936 diff --git a/model-00125-of-00172.safetensors b/model-00125-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c6f1150979a685bab0fa1bf08e2df6055ce4cf7 --- /dev/null +++ b/model-00125-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e841da1be2d7e7ca269b12a3b97633e9de77b39b7406e8fb0be82aec64701b +size 4919965616 diff --git a/model-00126-of-00172.safetensors b/model-00126-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36ea435b830d7a766bb39e5e8b03f4bfae45f008 --- /dev/null +++ b/model-00126-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872f28cc53f5bbf6cdb1b99c3e441019b37f23023c832243b0de068d01a8c507 +size 4932512000 diff --git a/model-00127-of-00172.safetensors b/model-00127-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1126cf2a501477e1b79698fcc3c6b9b18bc87888 --- /dev/null +++ b/model-00127-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26475d7a2f82933ec95f751f56535b53a33abcf036d04ca27ec27068630d8fbc +size 4974461432 diff --git a/model-00128-of-00172.safetensors b/model-00128-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..715d0316c8089cdcb7e56395f6f95e19197a62a7 --- /dev/null +++ b/model-00128-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f94d99da4701771a7988bfc96430477be4d7b6dc17ac0f5f04ae33c76bc3f8 +size 4919970176 diff --git a/model-00129-of-00172.safetensors b/model-00129-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8da914ab0c5a14150b750f585e2674a8b7a497de --- /dev/null +++ b/model-00129-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ded39b82ad9620f7fecfb9d565e261f920af3f78f245d48d256ed8c1fd120c2 +size 4932512000 diff --git a/model-00130-of-00172.safetensors b/model-00130-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e74943726bfd7fc9671d272832372af48c5feceb --- /dev/null +++ b/model-00130-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7741471a1e617f5d36c7de16c9cc1c277b14acf76939abb2fe94abd8d67f0725 +size 4974456920 diff --git a/model-00131-of-00172.safetensors b/model-00131-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9885ab1c85391f3e79cf6d66e53cf01f8af4f057 --- /dev/null +++ b/model-00131-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6dcf0acc5122e0fd6dedf1c09397e5e77af0c0413b26c66f5f09bf4baccce8 +size 4919974704 diff --git a/model-00132-of-00172.safetensors b/model-00132-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3e8c3c6ed2d1884781b1607f233cedaf4249699 --- /dev/null +++ b/model-00132-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e567096f7943a05f136cc598fa8f54688d3e39943666d44ac994cb877f3d94a9 +size 4932511976 diff --git a/model-00133-of-00172.safetensors b/model-00133-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f20fcb2e9089f4f76db00e83017214f1d757e0e --- /dev/null +++ b/model-00133-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08803d5d1ee176601cf2d34ea893389553cd70f9440a88619e0204a1e4afeb9 +size 4932512000 diff --git a/model-00134-of-00172.safetensors b/model-00134-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f269518b630283ab985ac508c3a5eea1723fd92 --- /dev/null +++ b/model-00134-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b712d07875843b7f7482c782bab8ec96c43cd813b51acc866cad32c2558665 +size 4999632400 diff --git a/model-00135-of-00172.safetensors b/model-00135-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95a9a1c74dcd00d51af412aeb06173f7fda9a1ef --- /dev/null +++ b/model-00135-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d940499b1e677a460a49d542fca677b080fa618d9c2e0ae58314dd45a21269 +size 4894799168 diff --git a/model-00136-of-00172.safetensors b/model-00136-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2edb82e2bce75223c8cdec3cc49a4b88a3b28215 --- /dev/null +++ b/model-00136-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276284ffe0b1d2aa3cd8071ef816095a3ddeb479f5277274a0db3673ff96a240 +size 4932512000 diff --git a/model-00137-of-00172.safetensors b/model-00137-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..934ec13bdd31b78b6072a0224f9952f9e8941e88 --- /dev/null +++ b/model-00137-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e6fd709945f731707ce5ae9f43883883312a597086a412d390cba51732dbfb +size 4999627880 diff --git a/model-00138-of-00172.safetensors b/model-00138-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa644bab96389bf18f13cb2d46fe0117563f3d92 --- /dev/null +++ b/model-00138-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b1fc4c5998e466752dbc43868d001a52a450d684e53e966cd74c74c6e486d4 +size 4894803720 diff --git a/model-00139-of-00172.safetensors b/model-00139-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98113e627d7afb32577f7b66db034271480344c4 --- /dev/null +++ b/model-00139-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49240aaa953e25e65bfbff3c50f528103aacc77a248674d8d766b80372fae8c +size 4932512000 diff --git a/model-00140-of-00172.safetensors b/model-00140-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d371623aa05505e883270cd2ce6d67a021b31ef6 --- /dev/null +++ b/model-00140-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167707cd756b18f1db496e919fd7dda2fbeb34c1b2a5e099baaf5a3260e6856a +size 4999623368 diff --git a/model-00141-of-00172.safetensors b/model-00141-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..189050dfa86e531273c00351915ae1b30d870fd1 --- /dev/null +++ b/model-00141-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba2bf93d0d52f6fd6a13f6462993a305ba7e1795a5d4431ce74fc3bb9eaf13b +size 4894808256 diff --git a/model-00142-of-00172.safetensors b/model-00142-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da7f7a7b6b9699ec18a608ccac232607122887a2 --- /dev/null +++ b/model-00142-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4da8a0b02936cf150e9dd2de03dbe2585867bfb4ebb4cf6f557112ab6058f3f +size 4932511976 diff --git a/model-00143-of-00172.safetensors b/model-00143-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d81512d4a37a32bbfc75a163995579ffbc676c28 --- /dev/null +++ b/model-00143-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38acfb0e5577fdc9516400a8f102ef8c0dd15ba48361d9d6fe1f0a6537c1c416 +size 4932512000 diff --git a/model-00144-of-00172.safetensors b/model-00144-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d0d55515a9b679189613832d20b47b5ff21ce79 --- /dev/null +++ b/model-00144-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443422dce2e3712fdf505fe513c24292a48b07b1f8fb60826ae1870a40799872 +size 4999632904 diff --git a/model-00145-of-00172.safetensors b/model-00145-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c780a24d6072eab9c2b2826ef17bdf9d25cc18a2 --- /dev/null +++ b/model-00145-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba56bb004d53a247e77d5e82c9c61e0adb793a3d20386035e0536dee781c7f3 +size 4894798664 diff --git a/model-00146-of-00172.safetensors b/model-00146-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71175f8e948c23cfc321eaed091db97cf87ed4b3 --- /dev/null +++ b/model-00146-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b56228d31433533b096531a43cf0348fd0c999346ff9cb70cc60b88816db486 +size 4932512000 diff --git a/model-00147-of-00172.safetensors b/model-00147-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb1378560ee7f47ff7b1027952f43e609557d627 --- /dev/null +++ b/model-00147-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92dfe06ed7f3dba21a583117f4bd56029fee299b0f68c1820727ff23fd9df045 +size 4999628384 diff --git a/model-00148-of-00172.safetensors b/model-00148-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d75f052c889e0b66707ac1923d1e5afc77f5026 --- /dev/null +++ b/model-00148-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff70e259a9ed18a71be830db0939b8d57ffee17d0a4fbb922cb97949da543a41 +size 4894803224 diff --git a/model-00149-of-00172.safetensors b/model-00149-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fecd2ddc251e88ebe5b995d903fd58277c2b52b6 --- /dev/null +++ b/model-00149-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3505aa9707232c08f87f39d4df69212e9ab638178f3f1d6f31e09d5dc9b61071 +size 4932512000 diff --git a/model-00150-of-00172.safetensors b/model-00150-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2637de120fbece57f50156e95d84658be7787f08 --- /dev/null +++ b/model-00150-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25890821bd87662e68d388a5ecdbee8f5310559c82e58c677fa23463ae6044e +size 4999623872 diff --git a/model-00151-of-00172.safetensors b/model-00151-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5753525a28af926ee27ffab743d8e6c329041c70 --- /dev/null +++ b/model-00151-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6baf65516dbf9f927af254034a5c2854d7189fea6d3dbf77cd17e8bf0e53c37 +size 4894807752 diff --git a/model-00152-of-00172.safetensors b/model-00152-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e19bdea36603b0fc4794be7f0bfcff5067a43b4 --- /dev/null +++ b/model-00152-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010c00e80927044c6dd810cdfb697e58ff69b838f170b6c42e759e58f23c714d +size 4932511984 diff --git a/model-00153-of-00172.safetensors b/model-00153-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa88ba7ba409aa1166d126bddf5a5c40b10254e8 --- /dev/null +++ b/model-00153-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f937a8a85eb6b9574cb9150ec5884b91a6c41ac3924021f1dd3623074a18ce93 +size 4932512000 diff --git a/model-00154-of-00172.safetensors b/model-00154-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c1aa3003a45cb17a8a858fc58c550f672bc5f93 --- /dev/null +++ b/model-00154-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a535712b415276cfb45d4341353739afeb9930b3f3290cc42fa7a3116f2cc66c +size 4974467464 diff --git a/model-00155-of-00172.safetensors b/model-00155-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d11818c0f8fe71bf6d93f39e2e023e3fbbeb989f --- /dev/null +++ b/model-00155-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701b3afedb4eb515fefc186fc3075cddd7cb9eb6a170537ca0fac67127a0371d +size 4919964112 diff --git a/model-00156-of-00172.safetensors b/model-00156-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61eeef88d70d795a52725c44d5793065e5e91ded --- /dev/null +++ b/model-00156-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d098a1426c179360fc02ebd9e7142a4d684425478c8b3b204617afed277650 +size 4932512000 diff --git a/model-00157-of-00172.safetensors b/model-00157-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68c781dfb96685ed660c4cd13248820e603aae01 --- /dev/null +++ b/model-00157-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccaf95bf3ca0048d2c55a1798f0ad634a2176c3c104f15d880d3c5a1b6df374a +size 4974462936 diff --git a/model-00158-of-00172.safetensors b/model-00158-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6c43ea72d6c76e546c8121e05bd40bbc0ecff1c --- /dev/null +++ b/model-00158-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2a487ec1ffac6420b49e22f4d94026a4a69f110b63234e4163b19128cd5cf9 +size 4919968664 diff --git a/model-00159-of-00172.safetensors b/model-00159-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1b516f31a3095ac2988b95bd745000fd80fd2d3 --- /dev/null +++ b/model-00159-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3851d5024259652baa7dd3bd1e96b9a3956a19eeb1af71e149829535cfaa0145 +size 4932512000 diff --git a/model-00160-of-00172.safetensors b/model-00160-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7927873f790e6946f859616bd37936a75a2098c --- /dev/null +++ b/model-00160-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb554439fd83011d131a42256870271ce5f175b6152e929c2e6aebb33d3fc6cb +size 4974458424 diff --git a/model-00161-of-00172.safetensors b/model-00161-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7ec03f9104f5ea4594f15362a60aea8a3ec0604 --- /dev/null +++ b/model-00161-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5883efd504b7081a1615fe3506951983cf6074bebb5cff17cfab0dbac049e090 +size 4919973192 diff --git a/model-00162-of-00172.safetensors b/model-00162-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d7d53592be7dea6f236406f0e7dfd7993bfe818 --- /dev/null +++ b/model-00162-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28105528af93ec20be913b9355ccefc555abd419d0c2c77ff0e797594427383 +size 4932511984 diff --git a/model-00163-of-00172.safetensors b/model-00163-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb44bacf53cf658acd1cfbf7ab77057976337450 --- /dev/null +++ b/model-00163-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753a789f15028b809013377013e9c3f311d4b63cf3bbbf40492ea7f622010853 +size 4932512000 diff --git a/model-00164-of-00172.safetensors b/model-00164-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21a4e9f25d65374e77903e6193a21e5cd94af8eb --- /dev/null +++ b/model-00164-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635d4041aeadf29d4df54923dd99920e9c2235f36e3f7903b23f93d42b3193b2 +size 4999633920 diff --git a/model-00165-of-00172.safetensors b/model-00165-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9eac0b60857c8d27f34d4c2aeaea116bb9a37a9 --- /dev/null +++ b/model-00165-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:697c8b7d3a192877011eca9f4fec4dee340009cbbcbbeb2a3a1426efeaa775a8 +size 4894797664 diff --git a/model-00166-of-00172.safetensors b/model-00166-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bc40157772cc971aaad44c526461b42adef4dbc --- /dev/null +++ b/model-00166-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2983d5ac9857f7c07210850f9569dbbb1a2785b448c56d0b258b8da0b8b9191 +size 4932512000 diff --git a/model-00167-of-00172.safetensors b/model-00167-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a60c144241c707cfb144a7eff8fa2bf75714bd87 --- /dev/null +++ b/model-00167-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e574d7a8868ac3932a31cdc4531e988031c079179b36f036293b865219fdf24 +size 4999629384 diff --git a/model-00168-of-00172.safetensors b/model-00168-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdfc67deb4df30cd2eea807758a2040c1740b11c --- /dev/null +++ b/model-00168-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2924b940517ada07dbbe61c89134930cfd080c9aee93dc05c685b76c40d216c5 +size 4894802208 diff --git a/model-00169-of-00172.safetensors b/model-00169-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be5ed2f228c3c66a96d7ba57519c1c8409c12480 --- /dev/null +++ b/model-00169-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2af02ad0ef4ac23497d23d2f5a9933fc47ec5405f91766ffa04b181f5a2602e +size 4932512000 diff --git a/model-00170-of-00172.safetensors b/model-00170-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8f7d5fef283660d919965bb57569838e8799bf0 --- /dev/null +++ b/model-00170-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d5db6d22f57bbdcabe8c45cf253b08443884b0e1c32e9dbba34db9433e6309 +size 4999624872 diff --git a/model-00171-of-00172.safetensors b/model-00171-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..842a86e3bc3ad6d5f617b5312e46b544e2143274 --- /dev/null +++ b/model-00171-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95531518cd5dc7d85a6e352581d73d07983a7ce62435542f44302a3615aee63 +size 4125266032 diff --git a/model-00172-of-00172.safetensors b/model-00172-of-00172.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40aa968a1d9678ee4a91031c06a8082c38097740 --- /dev/null +++ b/model-00172-of-00172.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c1adca1c3ec6a5f3ab73f14bc76b49f0a8636a08adb457c8b3a633db88732c +size 2955408088 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..7e01d131b3eb5613ccf9ecf8bf17e8d9050cb00a --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,20482 @@ +{ + "metadata": { + "total_size": 847159538176 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00172.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00172.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00172.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00172.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00172.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00172.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00172.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00172.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.0.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.0.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.1.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.1.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.2.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.2.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.3.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.3.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.4.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.4.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.5.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.5.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.6.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.6.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.7.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.7.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.8.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.8.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.9.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.9.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.10.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.10.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.11.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.11.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.12.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.12.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.13.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.13.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.14.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.14.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.15.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.15.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.16.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.16.up_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.17.down_proj.weight": "model-00002-of-00172.safetensors", + "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.17.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.18.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.18.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.19.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.19.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.20.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.20.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.21.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.21.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.22.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.22.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.23.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.23.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.24.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.24.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.25.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.25.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.26.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.26.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.27.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.27.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.28.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.28.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.29.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.29.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.30.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.30.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.31.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.31.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.32.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.32.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.33.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.33.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.34.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.34.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.35.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.35.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.36.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.36.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.37.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.37.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.38.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.38.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.39.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.39.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.40.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.40.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.41.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.41.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.42.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.42.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.43.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.43.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.44.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.44.up_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.45.down_proj.weight": "model-00003-of-00172.safetensors", + "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.45.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.46.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.46.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.47.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.47.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.48.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.48.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.49.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.49.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.50.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.50.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.51.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.51.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.52.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.52.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.53.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.53.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.54.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.54.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.55.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.55.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.56.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.56.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.57.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.57.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.58.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.58.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.59.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.59.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.60.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.60.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.61.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.61.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.62.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.62.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.63.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.63.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.64.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.64.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.64.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.65.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.65.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.65.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.66.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.66.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.66.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.67.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.67.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.67.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.68.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.68.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.68.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.69.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.69.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.69.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.70.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.70.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.70.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.71.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.71.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.71.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.72.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.72.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.72.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.73.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.73.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.73.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.74.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.74.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.74.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.75.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.75.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.75.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.76.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.76.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.76.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.77.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.77.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.77.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.78.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.78.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.78.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.79.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.79.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.79.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.80.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.80.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.80.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.81.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.81.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.81.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.82.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.82.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.82.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.83.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.83.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.83.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.84.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.84.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.84.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.85.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.85.gate_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.85.up_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.86.down_proj.weight": "model-00004-of-00172.safetensors", + "model.layers.3.mlp.experts.86.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.86.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.87.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.87.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.87.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.88.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.88.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.88.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.89.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.89.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.89.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.90.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.90.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.90.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.91.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.91.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.91.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.92.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.92.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.92.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.93.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.93.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.93.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.94.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.94.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.94.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.95.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.95.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.95.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.96.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.96.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.96.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.97.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.97.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.97.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.98.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.98.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.98.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.99.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.99.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.99.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.100.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.100.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.100.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.101.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.101.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.101.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.102.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.102.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.102.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.103.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.103.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.103.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.104.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.104.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.104.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.105.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.105.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.105.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.106.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.106.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.106.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.107.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.107.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.107.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.108.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.108.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.108.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.109.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.109.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.109.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.110.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.110.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.110.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.111.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.111.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.111.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.112.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.112.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.112.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.113.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.113.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.113.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.114.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.114.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.114.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.115.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.115.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.115.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.116.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.116.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.116.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.117.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.117.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.117.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.118.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.118.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.118.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.119.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.119.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.119.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.120.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.120.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.120.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.121.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.121.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.121.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.122.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.122.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.122.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.123.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.123.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.123.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.124.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.124.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.124.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.125.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.125.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.125.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.126.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.126.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.126.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.127.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.127.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.experts.127.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.gate.weight": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.gate.weight_1": "model-00005-of-00172.safetensors", + "model.layers.3.mlp.moe_statics.e_score_correction_bias": "model-00005-of-00172.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00172.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00005-of-00172.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00006-of-00172.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.64.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.64.gate_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.64.up_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.65.down_proj.weight": "model-00007-of-00172.safetensors", + "model.layers.4.mlp.experts.65.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.65.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.66.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.66.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.66.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.67.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.67.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.67.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.68.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.68.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.68.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.69.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.69.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.69.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.70.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.70.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.70.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.71.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.71.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.71.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.72.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.72.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.72.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.73.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.73.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.73.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.74.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.74.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.74.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.75.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.75.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.75.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.76.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.76.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.76.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.77.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.77.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.77.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.78.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.78.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.78.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.79.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.79.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.79.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.80.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.80.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.80.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.81.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.81.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.81.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.82.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.82.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.82.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.83.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.83.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.83.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.84.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.84.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.84.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.85.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.85.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.85.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.86.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.86.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.86.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.87.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.87.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.87.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.88.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.88.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.88.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.89.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.89.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.89.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.90.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.90.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.90.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.91.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.91.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.91.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.92.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.92.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.92.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.93.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.93.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.93.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.94.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.94.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.94.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.95.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.95.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.95.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.96.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.96.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.96.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.97.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.97.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.97.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.98.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.98.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.98.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.99.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.99.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.99.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.100.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.100.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.100.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.101.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.101.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.101.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.102.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.102.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.102.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.103.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.103.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.103.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.104.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.104.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.104.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.105.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.105.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.105.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.106.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.106.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.106.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.107.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.107.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.107.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.108.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.108.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.108.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.109.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.109.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.109.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.110.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.110.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.110.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.111.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.111.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.111.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.112.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.112.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.112.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.113.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.113.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.113.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.114.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.114.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.114.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.115.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.115.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.115.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.116.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.116.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.116.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.117.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.117.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.117.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.118.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.118.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.118.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.119.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.119.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.119.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.120.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.120.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.120.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.121.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.121.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.121.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.122.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.122.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.122.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.123.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.123.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.123.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.124.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.124.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.124.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.125.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.125.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.125.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.126.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.126.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.126.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.127.down_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.127.gate_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.experts.127.up_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.gate.weight": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.gate.weight_1": "model-00008-of-00172.safetensors", + "model.layers.4.mlp.moe_statics.e_score_correction_bias": "model-00008-of-00172.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00172.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00008-of-00172.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.input_layernorm.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00009-of-00172.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00010-of-00172.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.64.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.64.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.64.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.65.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.65.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.65.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.66.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.66.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.66.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.67.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.67.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.67.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.68.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.68.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.68.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.69.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.69.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.69.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.70.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.70.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.70.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.71.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.71.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.71.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.72.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.72.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.72.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.73.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.73.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.73.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.74.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.74.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.74.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.75.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.75.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.75.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.76.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.76.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.76.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.77.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.77.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.77.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.78.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.78.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.78.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.79.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.79.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.79.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.80.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.80.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.80.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.81.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.81.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.81.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.82.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.82.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.82.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.83.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.83.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.83.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.84.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.84.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.84.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.85.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.85.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.85.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.86.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.86.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.86.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.87.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.87.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.87.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.88.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.88.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.88.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.89.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.89.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.89.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.90.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.90.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.90.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.91.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.91.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.91.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.92.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.92.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.92.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.93.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.93.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.93.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.94.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.94.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.94.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.95.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.95.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.95.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.96.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.96.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.96.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.97.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.97.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.97.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.98.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.98.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.98.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.99.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.99.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.99.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.100.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.100.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.100.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.101.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.101.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.101.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.102.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.102.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.102.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.103.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.103.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.103.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.104.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.104.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.104.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.105.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.105.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.105.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.106.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.106.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.106.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.107.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.107.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.107.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.108.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.108.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.108.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.109.down_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.109.gate_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.109.up_proj.weight": "model-00011-of-00172.safetensors", + "model.layers.5.mlp.experts.110.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.110.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.110.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.111.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.111.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.111.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.112.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.112.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.112.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.113.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.113.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.113.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.114.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.114.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.114.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.115.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.115.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.115.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.116.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.116.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.116.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.117.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.117.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.117.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.118.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.118.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.118.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.119.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.119.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.119.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.120.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.120.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.120.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.121.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.121.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.121.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.122.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.122.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.122.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.123.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.123.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.123.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.124.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.124.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.124.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.125.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.125.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.125.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.126.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.126.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.126.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.127.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.127.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.experts.127.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.gate.weight": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.gate.weight_1": "model-00012-of-00172.safetensors", + "model.layers.5.mlp.moe_statics.e_score_correction_bias": "model-00012-of-00172.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00012-of-00172.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.input_layernorm.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00012-of-00172.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00013-of-00172.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.64.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.64.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.64.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.65.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.65.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.65.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.66.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.66.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.66.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.67.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.67.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.67.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.68.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.68.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.68.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.69.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.69.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.69.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.70.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.70.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.70.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.71.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.71.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.71.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.72.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.72.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.72.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.73.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.73.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.73.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.74.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.74.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.74.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.75.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.75.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.75.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.76.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.76.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.76.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.77.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.77.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.77.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.78.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.78.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.78.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.79.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.79.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.79.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.80.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.80.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.80.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.81.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.81.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.81.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.82.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.82.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.82.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.83.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.83.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.83.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.84.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.84.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.84.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.85.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.85.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.85.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.86.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.86.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.86.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.87.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.87.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.87.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.88.down_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.88.gate_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.88.up_proj.weight": "model-00014-of-00172.safetensors", + "model.layers.6.mlp.experts.89.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.89.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.89.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.90.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.90.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.90.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.91.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.91.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.91.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.92.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.92.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.92.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.93.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.93.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.93.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.94.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.94.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.94.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.95.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.95.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.95.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.96.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.96.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.96.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.97.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.97.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.97.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.98.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.98.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.98.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.99.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.99.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.99.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.100.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.100.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.100.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.101.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.101.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.101.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.102.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.102.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.102.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.103.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.103.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.103.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.104.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.104.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.104.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.105.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.105.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.105.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.106.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.106.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.106.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.107.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.107.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.107.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.108.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.108.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.108.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.109.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.109.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.109.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.110.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.110.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.110.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.111.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.111.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.111.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.112.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.112.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.112.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.113.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.113.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.113.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.114.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.114.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.114.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.115.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.115.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.115.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.116.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.116.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.116.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.117.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.117.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.117.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.118.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.118.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.118.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.119.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.119.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.119.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.120.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.120.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.120.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.121.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.121.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.121.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.122.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.122.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.122.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.123.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.123.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.123.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.124.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.124.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.124.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.125.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.125.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.125.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.126.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.126.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.126.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.127.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.127.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.experts.127.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.gate.weight": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.gate.weight_1": "model-00015-of-00172.safetensors", + "model.layers.6.mlp.moe_statics.e_score_correction_bias": "model-00015-of-00172.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00015-of-00172.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.input_layernorm.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00015-of-00172.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00016-of-00172.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.64.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.64.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.64.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.65.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.65.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.65.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.66.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.66.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.66.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.67.down_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.67.gate_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.67.up_proj.weight": "model-00017-of-00172.safetensors", + "model.layers.7.mlp.experts.68.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.68.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.68.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.69.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.69.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.69.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.70.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.70.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.70.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.71.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.71.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.71.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.72.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.72.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.72.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.73.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.73.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.73.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.74.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.74.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.74.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.75.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.75.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.75.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.76.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.76.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.76.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.77.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.77.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.77.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.78.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.78.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.78.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.79.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.79.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.79.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.80.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.80.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.80.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.81.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.81.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.81.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.82.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.82.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.82.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.83.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.83.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.83.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.84.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.84.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.84.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.85.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.85.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.85.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.86.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.86.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.86.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.87.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.87.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.87.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.88.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.88.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.88.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.89.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.89.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.89.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.90.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.90.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.90.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.91.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.91.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.91.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.92.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.92.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.92.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.93.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.93.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.93.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.94.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.94.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.94.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.95.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.95.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.95.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.96.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.96.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.96.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.97.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.97.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.97.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.98.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.98.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.98.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.99.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.99.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.99.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.100.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.100.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.100.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.101.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.101.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.101.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.102.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.102.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.102.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.103.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.103.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.103.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.104.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.104.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.104.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.105.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.105.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.105.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.106.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.106.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.106.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.107.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.107.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.107.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.108.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.108.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.108.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.109.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.109.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.109.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.110.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.110.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.110.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.111.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.111.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.111.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.112.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.112.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.112.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.113.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.113.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.113.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.114.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.114.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.114.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.115.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.115.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.115.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.116.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.116.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.116.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.117.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.117.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.117.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.118.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.118.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.118.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.119.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.119.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.119.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.120.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.120.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.120.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.121.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.121.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.121.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.122.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.122.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.122.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.123.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.123.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.123.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.124.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.124.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.124.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.125.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.125.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.125.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.126.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.126.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.126.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.127.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.127.gate_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.experts.127.up_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.gate.weight": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.gate.weight_1": "model-00018-of-00172.safetensors", + "model.layers.7.mlp.moe_statics.e_score_correction_bias": "model-00018-of-00172.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00018-of-00172.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.8.input_layernorm.weight": "model-00018-of-00172.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00018-of-00172.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00019-of-00172.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00020-of-00172.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.64.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.64.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.64.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.65.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.65.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.65.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.66.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.66.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.66.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.67.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.67.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.67.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.68.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.68.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.68.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.69.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.69.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.69.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.70.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.70.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.70.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.71.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.71.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.71.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.72.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.72.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.72.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.73.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.73.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.73.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.74.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.74.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.74.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.75.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.75.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.75.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.76.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.76.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.76.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.77.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.77.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.77.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.78.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.78.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.78.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.79.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.79.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.79.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.80.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.80.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.80.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.81.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.81.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.81.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.82.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.82.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.82.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.83.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.83.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.83.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.84.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.84.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.84.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.85.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.85.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.85.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.86.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.86.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.86.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.87.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.87.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.87.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.88.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.88.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.88.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.89.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.89.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.89.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.90.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.90.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.90.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.91.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.91.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.91.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.92.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.92.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.92.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.93.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.93.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.93.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.94.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.94.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.94.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.95.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.95.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.95.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.96.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.96.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.96.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.97.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.97.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.97.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.98.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.98.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.98.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.99.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.99.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.99.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.100.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.100.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.100.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.101.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.101.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.101.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.102.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.102.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.102.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.103.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.103.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.103.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.104.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.104.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.104.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.105.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.105.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.105.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.106.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.106.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.106.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.107.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.107.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.107.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.108.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.108.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.108.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.109.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.109.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.109.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.110.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.110.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.110.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.111.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.111.gate_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.111.up_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.112.down_proj.weight": "model-00021-of-00172.safetensors", + "model.layers.8.mlp.experts.112.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.112.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.113.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.113.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.113.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.114.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.114.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.114.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.115.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.115.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.115.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.116.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.116.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.116.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.117.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.117.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.117.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.118.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.118.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.118.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.119.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.119.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.119.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.120.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.120.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.120.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.121.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.121.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.121.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.122.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.122.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.122.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.123.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.123.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.123.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.124.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.124.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.124.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.125.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.125.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.125.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.126.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.126.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.126.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.127.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.127.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.experts.127.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.gate.weight": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.gate.weight_1": "model-00022-of-00172.safetensors", + "model.layers.8.mlp.moe_statics.e_score_correction_bias": "model-00022-of-00172.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00022-of-00172.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.input_layernorm.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00022-of-00172.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00023-of-00172.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.64.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.64.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.64.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.65.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.65.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.65.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.66.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.66.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.66.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.67.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.67.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.67.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.68.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.68.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.68.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.69.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.69.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.69.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.70.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.70.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.70.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.71.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.71.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.71.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.72.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.72.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.72.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.73.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.73.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.73.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.74.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.74.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.74.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.75.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.75.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.75.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.76.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.76.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.76.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.77.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.77.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.77.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.78.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.78.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.78.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.79.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.79.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.79.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.80.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.80.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.80.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.81.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.81.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.81.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.82.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.82.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.82.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.83.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.83.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.83.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.84.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.84.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.84.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.85.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.85.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.85.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.86.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.86.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.86.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.87.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.87.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.87.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.88.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.88.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.88.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.89.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.89.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.89.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.90.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.90.gate_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.90.up_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.91.down_proj.weight": "model-00024-of-00172.safetensors", + "model.layers.9.mlp.experts.91.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.91.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.92.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.92.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.92.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.93.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.93.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.93.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.94.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.94.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.94.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.95.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.95.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.95.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.96.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.96.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.96.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.97.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.97.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.97.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.98.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.98.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.98.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.99.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.99.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.99.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.100.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.100.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.100.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.101.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.101.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.101.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.102.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.102.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.102.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.103.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.103.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.103.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.104.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.104.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.104.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.105.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.105.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.105.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.106.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.106.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.106.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.107.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.107.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.107.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.108.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.108.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.108.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.109.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.109.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.109.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.110.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.110.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.110.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.111.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.111.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.111.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.112.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.112.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.112.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.113.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.113.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.113.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.114.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.114.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.114.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.115.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.115.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.115.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.116.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.116.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.116.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.117.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.117.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.117.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.118.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.118.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.118.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.119.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.119.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.119.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.120.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.120.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.120.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.121.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.121.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.121.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.122.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.122.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.122.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.123.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.123.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.123.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.124.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.124.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.124.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.125.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.125.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.125.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.126.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.126.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.126.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.127.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.127.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.experts.127.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.gate.weight": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.gate.weight_1": "model-00025-of-00172.safetensors", + "model.layers.9.mlp.moe_statics.e_score_correction_bias": "model-00025-of-00172.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00025-of-00172.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.input_layernorm.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00025-of-00172.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00026-of-00172.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.64.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.64.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.64.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.65.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.65.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.65.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.66.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.66.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.66.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.67.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.67.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.67.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.68.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.68.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.68.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.69.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.69.gate_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.69.up_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.70.down_proj.weight": "model-00027-of-00172.safetensors", + "model.layers.10.mlp.experts.70.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.70.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.71.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.71.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.71.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.72.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.72.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.72.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.73.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.73.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.73.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.74.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.74.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.74.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.75.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.75.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.75.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.76.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.76.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.76.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.77.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.77.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.77.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.78.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.78.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.78.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.79.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.79.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.79.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.80.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.80.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.80.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.81.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.81.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.81.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.82.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.82.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.82.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.83.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.83.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.83.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.84.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.84.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.84.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.85.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.85.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.85.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.86.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.86.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.86.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.87.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.87.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.87.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.88.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.88.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.88.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.89.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.89.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.89.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.90.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.90.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.90.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.91.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.91.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.91.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.92.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.92.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.92.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.93.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.93.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.93.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.94.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.94.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.94.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.95.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.95.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.95.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.96.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.96.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.96.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.97.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.97.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.97.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.98.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.98.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.98.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.99.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.99.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.99.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.100.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.100.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.100.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.101.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.101.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.101.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.102.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.102.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.102.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.103.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.103.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.103.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.104.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.104.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.104.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.105.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.105.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.105.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.106.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.106.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.106.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.107.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.107.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.107.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.108.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.108.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.108.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.109.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.109.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.109.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.110.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.110.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.110.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.111.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.111.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.111.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.112.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.112.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.112.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.113.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.113.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.113.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.114.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.114.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.114.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.115.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.115.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.115.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.116.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.116.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.116.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.117.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.117.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.117.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.118.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.118.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.118.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.119.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.119.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.119.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.120.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.120.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.120.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.121.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.121.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.121.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.122.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.122.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.122.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.123.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.123.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.123.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.124.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.124.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.124.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.125.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.125.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.125.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.126.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.126.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.126.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.127.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.127.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.experts.127.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.gate.weight": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.gate.weight_1": "model-00028-of-00172.safetensors", + "model.layers.10.mlp.moe_statics.e_score_correction_bias": "model-00028-of-00172.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00028-of-00172.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.11.input_layernorm.weight": "model-00028-of-00172.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00028-of-00172.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00029-of-00172.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00030-of-00172.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.64.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.64.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.64.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.65.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.65.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.65.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.66.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.66.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.66.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.67.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.67.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.67.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.68.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.68.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.68.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.69.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.69.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.69.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.70.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.70.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.70.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.71.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.71.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.71.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.72.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.72.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.72.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.73.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.73.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.73.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.74.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.74.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.74.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.75.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.75.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.75.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.76.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.76.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.76.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.77.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.77.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.77.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.78.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.78.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.78.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.79.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.79.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.79.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.80.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.80.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.80.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.81.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.81.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.81.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.82.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.82.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.82.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.83.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.83.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.83.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.84.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.84.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.84.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.85.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.85.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.85.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.86.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.86.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.86.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.87.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.87.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.87.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.88.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.88.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.88.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.89.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.89.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.89.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.90.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.90.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.90.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.91.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.91.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.91.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.92.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.92.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.92.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.93.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.93.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.93.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.94.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.94.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.94.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.95.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.95.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.95.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.96.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.96.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.96.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.97.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.97.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.97.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.98.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.98.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.98.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.99.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.99.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.99.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.100.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.100.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.100.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.101.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.101.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.101.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.102.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.102.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.102.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.103.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.103.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.103.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.104.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.104.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.104.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.105.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.105.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.105.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.106.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.106.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.106.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.107.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.107.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.107.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.108.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.108.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.108.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.109.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.109.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.109.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.110.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.110.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.110.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.111.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.111.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.111.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.112.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.112.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.112.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.113.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.113.gate_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.113.up_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.114.down_proj.weight": "model-00031-of-00172.safetensors", + "model.layers.11.mlp.experts.114.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.114.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.115.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.115.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.115.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.116.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.116.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.116.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.117.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.117.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.117.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.118.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.118.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.118.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.119.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.119.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.119.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.120.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.120.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.120.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.121.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.121.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.121.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.122.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.122.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.122.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.123.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.123.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.123.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.124.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.124.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.124.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.125.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.125.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.125.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.126.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.126.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.126.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.127.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.127.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.experts.127.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.gate.weight": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.gate.weight_1": "model-00032-of-00172.safetensors", + "model.layers.11.mlp.moe_statics.e_score_correction_bias": "model-00032-of-00172.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00032-of-00172.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.input_layernorm.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.0.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.0.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.1.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.1.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.2.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.2.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.3.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.3.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.4.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.4.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.5.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.5.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.6.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.6.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.7.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.7.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.8.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.8.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.9.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.9.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.10.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.10.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.11.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.11.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.12.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.12.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.13.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.13.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.14.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.14.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.15.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.15.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.16.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.16.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.17.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.17.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.18.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.18.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.19.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.19.up_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.20.down_proj.weight": "model-00032-of-00172.safetensors", + "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.20.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.21.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.21.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.22.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.22.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.23.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.23.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.24.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.24.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.25.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.25.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.26.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.26.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.27.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.27.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.28.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.28.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.29.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.29.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.30.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.30.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.31.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.31.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.32.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.32.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.33.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.33.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.34.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.34.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.35.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.35.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.36.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.36.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.37.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.37.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.38.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.38.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.39.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.39.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.40.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.40.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.41.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.41.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.42.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.42.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.43.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.43.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.44.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.44.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.45.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.45.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.46.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.46.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.47.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.47.up_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.48.down_proj.weight": "model-00033-of-00172.safetensors", + "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.48.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.49.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.49.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.50.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.50.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.51.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.51.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.52.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.52.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.53.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.53.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.54.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.54.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.55.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.55.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.56.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.56.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.57.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.57.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.58.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.58.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.59.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.59.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.60.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.60.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.61.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.61.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.62.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.62.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.63.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.63.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.64.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.64.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.64.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.65.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.65.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.65.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.66.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.66.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.66.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.67.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.67.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.67.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.68.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.68.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.68.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.69.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.69.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.69.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.70.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.70.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.70.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.71.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.71.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.71.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.72.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.72.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.72.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.73.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.73.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.73.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.74.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.74.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.74.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.75.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.75.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.75.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.76.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.76.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.76.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.77.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.77.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.77.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.78.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.78.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.78.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.79.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.79.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.79.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.80.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.80.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.80.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.81.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.81.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.81.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.82.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.82.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.82.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.83.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.83.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.83.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.84.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.84.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.84.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.85.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.85.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.85.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.86.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.86.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.86.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.87.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.87.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.87.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.88.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.88.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.88.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.89.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.89.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.89.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.90.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.90.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.90.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.91.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.91.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.91.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.92.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.92.gate_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.92.up_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.93.down_proj.weight": "model-00034-of-00172.safetensors", + "model.layers.12.mlp.experts.93.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.93.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.94.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.94.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.94.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.95.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.95.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.95.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.96.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.96.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.96.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.97.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.97.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.97.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.98.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.98.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.98.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.99.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.99.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.99.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.100.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.100.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.100.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.101.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.101.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.101.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.102.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.102.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.102.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.103.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.103.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.103.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.104.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.104.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.104.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.105.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.105.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.105.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.106.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.106.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.106.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.107.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.107.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.107.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.108.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.108.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.108.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.109.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.109.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.109.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.110.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.110.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.110.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.111.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.111.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.111.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.112.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.112.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.112.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.113.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.113.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.113.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.114.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.114.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.114.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.115.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.115.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.115.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.116.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.116.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.116.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.117.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.117.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.117.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.118.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.118.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.118.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.119.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.119.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.119.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.120.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.120.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.120.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.121.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.121.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.121.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.122.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.122.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.122.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.123.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.123.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.123.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.124.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.124.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.124.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.125.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.125.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.125.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.126.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.126.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.126.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.127.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.127.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.experts.127.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.gate.weight": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.gate.weight_1": "model-00035-of-00172.safetensors", + "model.layers.12.mlp.moe_statics.e_score_correction_bias": "model-00035-of-00172.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00035-of-00172.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.input_layernorm.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.0.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.0.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.1.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.1.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.2.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.2.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.3.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.3.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.4.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.4.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.5.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.5.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.6.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.6.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.7.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.7.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.8.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.8.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.9.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.9.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.10.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.10.up_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.11.down_proj.weight": "model-00035-of-00172.safetensors", + "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.11.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.12.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.12.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.13.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.13.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.14.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.14.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.15.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.15.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.16.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.16.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.17.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.17.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.18.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.18.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.19.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.19.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.20.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.20.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.21.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.21.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.22.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.22.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.23.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.23.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.24.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.24.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.25.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.25.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.26.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.26.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.27.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.27.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.28.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.28.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.29.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.29.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.30.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.30.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.31.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.31.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.32.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.32.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.33.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.33.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.34.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.34.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.35.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.35.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.36.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.36.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.37.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.37.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.38.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.38.up_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.39.down_proj.weight": "model-00036-of-00172.safetensors", + "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.39.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.40.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.40.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.41.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.41.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.42.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.42.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.43.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.43.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.44.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.44.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.45.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.45.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.46.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.46.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.47.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.47.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.48.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.48.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.49.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.49.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.50.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.50.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.51.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.51.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.52.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.52.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.53.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.53.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.54.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.54.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.55.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.55.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.56.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.56.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.57.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.57.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.58.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.58.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.59.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.59.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.60.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.60.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.61.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.61.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.62.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.62.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.63.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.63.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.64.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.64.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.64.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.65.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.65.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.65.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.66.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.66.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.66.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.67.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.67.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.67.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.68.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.68.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.68.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.69.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.69.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.69.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.70.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.70.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.70.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.71.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.71.gate_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.71.up_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.72.down_proj.weight": "model-00037-of-00172.safetensors", + "model.layers.13.mlp.experts.72.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.72.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.73.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.73.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.73.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.74.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.74.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.74.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.75.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.75.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.75.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.76.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.76.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.76.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.77.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.77.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.77.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.78.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.78.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.78.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.79.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.79.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.79.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.80.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.80.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.80.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.81.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.81.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.81.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.82.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.82.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.82.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.83.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.83.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.83.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.84.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.84.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.84.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.85.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.85.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.85.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.86.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.86.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.86.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.87.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.87.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.87.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.88.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.88.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.88.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.89.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.89.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.89.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.90.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.90.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.90.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.91.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.91.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.91.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.92.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.92.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.92.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.93.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.93.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.93.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.94.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.94.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.94.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.95.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.95.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.95.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.96.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.96.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.96.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.97.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.97.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.97.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.98.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.98.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.98.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.99.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.99.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.99.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.100.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.100.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.100.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.101.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.101.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.101.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.102.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.102.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.102.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.103.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.103.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.103.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.104.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.104.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.104.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.105.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.105.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.105.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.106.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.106.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.106.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.107.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.107.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.107.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.108.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.108.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.108.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.109.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.109.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.109.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.110.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.110.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.110.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.111.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.111.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.111.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.112.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.112.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.112.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.113.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.113.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.113.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.114.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.114.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.114.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.115.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.115.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.115.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.116.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.116.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.116.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.117.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.117.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.117.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.118.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.118.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.118.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.119.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.119.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.119.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.120.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.120.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.120.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.121.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.121.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.121.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.122.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.122.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.122.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.123.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.123.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.123.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.124.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.124.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.124.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.125.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.125.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.125.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.126.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.126.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.126.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.127.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.127.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.experts.127.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.gate.weight": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.gate.weight_1": "model-00038-of-00172.safetensors", + "model.layers.13.mlp.moe_statics.e_score_correction_bias": "model-00038-of-00172.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00038-of-00172.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.input_layernorm.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.0.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.0.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.1.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.1.up_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.2.down_proj.weight": "model-00038-of-00172.safetensors", + "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.2.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.3.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.3.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.4.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.4.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.5.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.5.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.6.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.6.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.7.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.7.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.8.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.8.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.9.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.9.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.10.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.10.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.11.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.11.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.12.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.12.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.13.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.13.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.14.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.14.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.15.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.15.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.16.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.16.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.17.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.17.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.18.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.18.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.19.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.19.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.20.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.20.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.21.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.21.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.22.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.22.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.23.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.23.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.24.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.24.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.25.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.25.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.26.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.26.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.27.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.27.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.28.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.28.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.29.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.29.up_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.30.down_proj.weight": "model-00039-of-00172.safetensors", + "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.30.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.31.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.31.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.32.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.32.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.33.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.33.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.34.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.34.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.35.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.35.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.36.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.36.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.37.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.37.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.38.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.38.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.39.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.39.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.40.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.40.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.41.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.41.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.42.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.42.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.43.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.43.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.44.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.44.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.45.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.45.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.46.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.46.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.47.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.47.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.48.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.48.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.49.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.49.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.50.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.50.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.51.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.51.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.52.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.52.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.53.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.53.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.54.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.54.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.55.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.55.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.56.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.56.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.57.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.57.up_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.58.down_proj.weight": "model-00040-of-00172.safetensors", + "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.58.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.59.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.59.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.60.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.60.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.61.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.61.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.62.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.62.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.63.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.63.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.64.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.64.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.64.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.65.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.65.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.65.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.66.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.66.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.66.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.67.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.67.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.67.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.68.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.68.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.68.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.69.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.69.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.69.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.70.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.70.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.70.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.71.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.71.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.71.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.72.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.72.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.72.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.73.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.73.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.73.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.74.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.74.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.74.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.75.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.75.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.75.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.76.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.76.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.76.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.77.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.77.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.77.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.78.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.78.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.78.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.79.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.79.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.79.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.80.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.80.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.80.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.81.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.81.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.81.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.82.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.82.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.82.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.83.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.83.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.83.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.84.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.84.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.84.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.85.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.85.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.85.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.86.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.86.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.86.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.87.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.87.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.87.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.88.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.88.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.88.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.89.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.89.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.89.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.90.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.90.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.90.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.91.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.91.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.91.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.92.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.92.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.92.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.93.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.93.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.93.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.94.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.94.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.94.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.95.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.95.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.95.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.96.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.96.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.96.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.97.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.97.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.97.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.98.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.98.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.98.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.99.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.99.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.99.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.100.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.100.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.100.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.101.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.101.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.101.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.102.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.102.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.102.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.103.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.103.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.103.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.104.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.104.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.104.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.105.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.105.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.105.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.106.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.106.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.106.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.107.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.107.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.107.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.108.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.108.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.108.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.109.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.109.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.109.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.110.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.110.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.110.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.111.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.111.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.111.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.112.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.112.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.112.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.113.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.113.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.113.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.114.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.114.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.114.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.115.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.115.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.115.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.116.down_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.116.gate_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.116.up_proj.weight": "model-00041-of-00172.safetensors", + "model.layers.14.mlp.experts.117.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.117.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.117.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.118.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.118.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.118.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.119.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.119.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.119.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.120.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.120.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.120.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.121.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.121.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.121.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.122.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.122.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.122.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.123.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.123.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.123.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.124.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.124.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.124.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.125.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.125.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.125.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.126.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.126.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.126.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.127.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.127.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.experts.127.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.gate.weight": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.gate.weight_1": "model-00042-of-00172.safetensors", + "model.layers.14.mlp.moe_statics.e_score_correction_bias": "model-00042-of-00172.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00042-of-00172.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.input_layernorm.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.0.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.0.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.1.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.1.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.2.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.2.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.3.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.3.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.4.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.4.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.5.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.5.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.6.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.6.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.7.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.7.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.8.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.8.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.9.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.9.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.10.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.10.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.11.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.11.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.12.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.12.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.13.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.13.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.14.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.14.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.15.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.15.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.16.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.16.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.17.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.17.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.18.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.18.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.19.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.19.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.20.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.20.up_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.21.down_proj.weight": "model-00042-of-00172.safetensors", + "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.21.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.22.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.22.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.23.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.23.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.24.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.24.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.25.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.25.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.26.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.26.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.27.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.27.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.28.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.28.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.29.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.29.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.30.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.30.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.31.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.31.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.32.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.32.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.33.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.33.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.34.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.34.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.35.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.35.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.36.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.36.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.37.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.37.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.38.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.38.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.39.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.39.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.40.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.40.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.41.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.41.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.42.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.42.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.43.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.43.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.44.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.44.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.45.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.45.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.46.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.46.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.47.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.47.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.48.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.48.up_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.49.down_proj.weight": "model-00043-of-00172.safetensors", + "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.49.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.50.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.50.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.51.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.51.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.52.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.52.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.53.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.53.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.54.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.54.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.55.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.55.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.56.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.56.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.57.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.57.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.58.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.58.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.59.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.59.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.60.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.60.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.61.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.61.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.62.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.62.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.63.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.63.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.64.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.64.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.64.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.65.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.65.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.65.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.66.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.66.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.66.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.67.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.67.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.67.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.68.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.68.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.68.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.69.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.69.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.69.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.70.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.70.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.70.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.71.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.71.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.71.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.72.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.72.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.72.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.73.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.73.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.73.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.74.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.74.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.74.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.75.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.75.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.75.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.76.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.76.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.76.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.77.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.77.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.77.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.78.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.78.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.78.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.79.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.79.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.79.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.80.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.80.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.80.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.81.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.81.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.81.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.82.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.82.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.82.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.83.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.83.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.83.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.84.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.84.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.84.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.85.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.85.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.85.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.86.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.86.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.86.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.87.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.87.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.87.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.88.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.88.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.88.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.89.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.89.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.89.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.90.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.90.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.90.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.91.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.91.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.91.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.92.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.92.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.92.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.93.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.93.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.93.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.94.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.94.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.94.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.95.down_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.95.gate_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.95.up_proj.weight": "model-00044-of-00172.safetensors", + "model.layers.15.mlp.experts.96.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.96.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.96.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.97.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.97.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.97.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.98.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.98.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.98.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.99.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.99.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.99.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.100.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.100.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.100.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.101.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.101.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.101.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.102.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.102.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.102.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.103.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.103.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.103.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.104.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.104.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.104.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.105.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.105.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.105.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.106.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.106.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.106.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.107.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.107.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.107.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.108.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.108.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.108.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.109.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.109.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.109.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.110.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.110.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.110.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.111.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.111.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.111.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.112.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.112.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.112.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.113.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.113.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.113.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.114.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.114.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.114.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.115.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.115.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.115.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.116.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.116.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.116.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.117.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.117.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.117.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.118.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.118.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.118.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.119.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.119.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.119.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.120.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.120.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.120.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.121.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.121.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.121.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.122.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.122.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.122.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.123.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.123.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.123.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.124.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.124.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.124.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.125.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.125.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.125.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.126.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.126.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.126.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.127.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.127.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.experts.127.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.gate.weight": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.gate.weight_1": "model-00045-of-00172.safetensors", + "model.layers.15.mlp.moe_statics.e_score_correction_bias": "model-00045-of-00172.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00045-of-00172.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.input_layernorm.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.0.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.0.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.1.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.1.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.2.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.2.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.3.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.3.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.4.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.4.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.5.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.5.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.6.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.6.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.7.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.7.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.8.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.8.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.9.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.9.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.10.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.10.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.11.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.11.up_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.12.down_proj.weight": "model-00045-of-00172.safetensors", + "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.12.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.13.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.13.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.14.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.14.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.15.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.15.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.16.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.16.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.17.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.17.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.18.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.18.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.19.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.19.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.20.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.20.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.21.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.21.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.22.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.22.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.23.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.23.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.24.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.24.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.25.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.25.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.26.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.26.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.27.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.27.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.28.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.28.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.29.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.29.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.30.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.30.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.31.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.31.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.32.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.32.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.33.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.33.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.34.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.34.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.35.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.35.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.36.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.36.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.37.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.37.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.38.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.38.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.39.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.39.up_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.40.down_proj.weight": "model-00046-of-00172.safetensors", + "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.40.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.41.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.41.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.42.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.42.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.43.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.43.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.44.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.44.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.45.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.45.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.46.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.46.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.47.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.47.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.48.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.48.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.49.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.49.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.50.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.50.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.51.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.51.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.52.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.52.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.53.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.53.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.54.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.54.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.55.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.55.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.56.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.56.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.57.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.57.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.58.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.58.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.59.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.59.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.60.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.60.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.61.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.61.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.62.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.62.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.63.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.63.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.64.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.64.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.64.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.65.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.65.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.65.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.66.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.66.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.66.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.67.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.67.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.67.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.68.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.68.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.68.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.69.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.69.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.69.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.70.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.70.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.70.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.71.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.71.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.71.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.72.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.72.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.72.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.73.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.73.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.73.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.74.down_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.74.gate_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.74.up_proj.weight": "model-00047-of-00172.safetensors", + "model.layers.16.mlp.experts.75.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.75.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.75.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.76.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.76.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.76.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.77.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.77.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.77.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.78.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.78.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.78.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.79.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.79.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.79.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.80.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.80.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.80.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.81.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.81.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.81.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.82.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.82.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.82.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.83.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.83.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.83.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.84.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.84.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.84.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.85.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.85.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.85.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.86.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.86.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.86.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.87.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.87.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.87.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.88.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.88.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.88.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.89.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.89.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.89.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.90.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.90.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.90.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.91.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.91.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.91.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.92.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.92.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.92.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.93.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.93.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.93.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.94.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.94.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.94.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.95.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.95.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.95.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.96.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.96.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.96.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.97.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.97.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.97.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.98.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.98.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.98.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.99.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.99.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.99.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.100.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.100.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.100.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.101.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.101.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.101.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.102.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.102.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.102.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.103.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.103.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.103.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.104.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.104.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.104.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.105.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.105.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.105.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.106.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.106.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.106.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.107.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.107.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.107.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.108.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.108.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.108.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.109.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.109.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.109.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.110.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.110.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.110.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.111.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.111.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.111.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.112.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.112.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.112.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.113.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.113.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.113.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.114.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.114.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.114.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.115.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.115.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.115.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.116.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.116.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.116.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.117.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.117.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.117.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.118.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.118.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.118.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.119.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.119.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.119.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.120.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.120.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.120.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.121.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.121.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.121.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.122.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.122.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.122.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.123.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.123.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.123.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.124.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.124.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.124.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.125.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.125.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.125.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.126.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.126.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.126.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.127.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.127.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.experts.127.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.gate.weight": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.gate.weight_1": "model-00048-of-00172.safetensors", + "model.layers.16.mlp.moe_statics.e_score_correction_bias": "model-00048-of-00172.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00048-of-00172.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.input_layernorm.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.0.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.0.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.1.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.1.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.2.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.2.up_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.3.down_proj.weight": "model-00048-of-00172.safetensors", + "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.3.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.4.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.4.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.5.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.5.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.6.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.6.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.7.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.7.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.8.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.8.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.9.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.9.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.10.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.10.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.11.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.11.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.12.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.12.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.13.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.13.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.14.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.14.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.15.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.15.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.16.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.16.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.17.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.17.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.18.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.18.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.19.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.19.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.20.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.20.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.21.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.21.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.22.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.22.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.23.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.23.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.24.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.24.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.25.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.25.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.26.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.26.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.27.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.27.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.28.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.28.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.29.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.29.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.30.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.30.up_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.31.down_proj.weight": "model-00049-of-00172.safetensors", + "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.31.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.32.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.32.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.33.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.33.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.34.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.34.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.35.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.35.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.36.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.36.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.37.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.37.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.38.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.38.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.39.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.39.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.40.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.40.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.41.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.41.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.42.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.42.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.43.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.43.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.44.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.44.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.45.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.45.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.46.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.46.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.47.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.47.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.48.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.48.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.49.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.49.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.50.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.50.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.51.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.51.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.52.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.52.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.53.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.53.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.54.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.54.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.55.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.55.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.56.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.56.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.57.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.57.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.58.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.58.up_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.59.down_proj.weight": "model-00050-of-00172.safetensors", + "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.59.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.60.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.60.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.61.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.61.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.62.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.62.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.63.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.63.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.64.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.64.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.64.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.65.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.65.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.65.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.66.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.66.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.66.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.67.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.67.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.67.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.68.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.68.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.68.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.69.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.69.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.69.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.70.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.70.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.70.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.71.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.71.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.71.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.72.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.72.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.72.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.73.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.73.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.73.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.74.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.74.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.74.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.75.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.75.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.75.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.76.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.76.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.76.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.77.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.77.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.77.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.78.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.78.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.78.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.79.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.79.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.79.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.80.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.80.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.80.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.81.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.81.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.81.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.82.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.82.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.82.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.83.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.83.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.83.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.84.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.84.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.84.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.85.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.85.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.85.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.86.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.86.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.86.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.87.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.87.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.87.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.88.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.88.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.88.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.89.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.89.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.89.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.90.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.90.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.90.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.91.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.91.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.91.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.92.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.92.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.92.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.93.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.93.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.93.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.94.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.94.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.94.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.95.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.95.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.95.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.96.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.96.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.96.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.97.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.97.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.97.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.98.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.98.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.98.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.99.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.99.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.99.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.100.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.100.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.100.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.101.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.101.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.101.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.102.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.102.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.102.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.103.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.103.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.103.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.104.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.104.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.104.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.105.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.105.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.105.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.106.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.106.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.106.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.107.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.107.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.107.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.108.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.108.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.108.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.109.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.109.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.109.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.110.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.110.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.110.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.111.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.111.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.111.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.112.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.112.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.112.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.113.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.113.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.113.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.114.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.114.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.114.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.115.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.115.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.115.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.116.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.116.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.116.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.117.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.117.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.117.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.118.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.118.gate_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.118.up_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.119.down_proj.weight": "model-00051-of-00172.safetensors", + "model.layers.17.mlp.experts.119.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.119.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.120.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.120.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.120.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.121.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.121.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.121.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.122.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.122.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.122.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.123.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.123.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.123.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.124.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.124.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.124.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.125.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.125.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.125.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.126.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.126.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.126.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.127.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.127.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.experts.127.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.gate.weight": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.gate.weight_1": "model-00052-of-00172.safetensors", + "model.layers.17.mlp.moe_statics.e_score_correction_bias": "model-00052-of-00172.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00052-of-00172.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.input_layernorm.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.0.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.0.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.1.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.1.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.2.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.2.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.3.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.3.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.4.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.4.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.5.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.5.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.6.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.6.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.7.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.7.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.8.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.8.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.9.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.9.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.10.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.10.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.11.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.11.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.12.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.12.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.13.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.13.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.14.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.14.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.15.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.15.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.16.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.16.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.17.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.17.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.18.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.18.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.19.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.19.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.20.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.20.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.21.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.21.up_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.22.down_proj.weight": "model-00052-of-00172.safetensors", + "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.22.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.23.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.23.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.24.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.24.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.25.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.25.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.26.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.26.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.27.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.27.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.28.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.28.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.29.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.29.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.30.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.30.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.31.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.31.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.32.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.32.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.33.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.33.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.34.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.34.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.35.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.35.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.36.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.36.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.37.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.37.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.38.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.38.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.39.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.39.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.40.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.40.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.41.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.41.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.42.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.42.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.43.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.43.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.44.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.44.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.45.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.45.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.46.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.46.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.47.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.47.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.48.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.48.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.49.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.49.up_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.50.down_proj.weight": "model-00053-of-00172.safetensors", + "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.50.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.51.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.51.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.52.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.52.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.53.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.53.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.54.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.54.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.55.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.55.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.56.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.56.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.57.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.57.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.58.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.58.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.59.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.59.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.60.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.60.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.61.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.61.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.62.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.62.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.63.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.63.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.64.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.64.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.64.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.65.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.65.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.65.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.66.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.66.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.66.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.67.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.67.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.67.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.68.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.68.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.68.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.69.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.69.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.69.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.70.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.70.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.70.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.71.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.71.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.71.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.72.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.72.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.72.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.73.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.73.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.73.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.74.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.74.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.74.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.75.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.75.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.75.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.76.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.76.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.76.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.77.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.77.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.77.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.78.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.78.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.78.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.79.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.79.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.79.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.80.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.80.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.80.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.81.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.81.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.81.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.82.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.82.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.82.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.83.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.83.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.83.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.84.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.84.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.84.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.85.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.85.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.85.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.86.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.86.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.86.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.87.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.87.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.87.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.88.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.88.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.88.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.89.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.89.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.89.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.90.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.90.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.90.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.91.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.91.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.91.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.92.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.92.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.92.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.93.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.93.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.93.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.94.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.94.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.94.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.95.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.95.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.95.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.96.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.96.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.96.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.97.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.97.gate_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.97.up_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.98.down_proj.weight": "model-00054-of-00172.safetensors", + "model.layers.18.mlp.experts.98.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.98.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.99.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.99.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.99.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.100.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.100.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.100.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.101.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.101.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.101.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.102.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.102.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.102.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.103.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.103.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.103.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.104.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.104.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.104.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.105.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.105.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.105.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.106.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.106.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.106.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.107.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.107.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.107.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.108.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.108.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.108.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.109.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.109.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.109.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.110.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.110.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.110.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.111.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.111.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.111.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.112.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.112.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.112.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.113.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.113.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.113.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.114.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.114.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.114.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.115.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.115.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.115.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.116.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.116.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.116.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.117.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.117.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.117.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.118.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.118.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.118.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.119.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.119.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.119.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.120.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.120.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.120.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.121.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.121.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.121.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.122.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.122.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.122.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.123.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.123.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.123.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.124.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.124.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.124.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.125.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.125.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.125.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.126.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.126.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.126.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.127.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.127.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.experts.127.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.gate.weight": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.gate.weight_1": "model-00055-of-00172.safetensors", + "model.layers.18.mlp.moe_statics.e_score_correction_bias": "model-00055-of-00172.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00055-of-00172.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.input_layernorm.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.0.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.0.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.1.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.1.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.2.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.2.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.3.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.3.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.4.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.4.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.5.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.5.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.6.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.6.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.7.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.7.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.8.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.8.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.9.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.9.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.10.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.10.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.11.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.11.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.12.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.12.up_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.13.down_proj.weight": "model-00055-of-00172.safetensors", + "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.13.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.14.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.14.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.15.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.15.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.16.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.16.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.17.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.17.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.18.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.18.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.19.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.19.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.20.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.20.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.21.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.21.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.22.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.22.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.23.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.23.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.24.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.24.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.25.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.25.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.26.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.26.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.27.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.27.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.28.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.28.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.29.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.29.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.30.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.30.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.31.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.31.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.32.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.32.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.33.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.33.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.34.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.34.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.35.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.35.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.36.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.36.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.37.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.37.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.38.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.38.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.39.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.39.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.40.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.40.up_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.41.down_proj.weight": "model-00056-of-00172.safetensors", + "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.41.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.42.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.42.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.43.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.43.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.44.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.44.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.45.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.45.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.46.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.46.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.47.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.47.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.48.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.48.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.49.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.49.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.50.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.50.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.51.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.51.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.52.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.52.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.53.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.53.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.54.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.54.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.55.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.55.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.56.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.56.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.57.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.57.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.58.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.58.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.59.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.59.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.60.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.60.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.61.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.61.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.62.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.62.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.63.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.63.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.64.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.64.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.64.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.65.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.65.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.65.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.66.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.66.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.66.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.67.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.67.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.67.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.68.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.68.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.68.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.69.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.69.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.69.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.70.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.70.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.70.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.71.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.71.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.71.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.72.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.72.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.72.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.73.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.73.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.73.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.74.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.74.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.74.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.75.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.75.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.75.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.76.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.76.gate_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.76.up_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.77.down_proj.weight": "model-00057-of-00172.safetensors", + "model.layers.19.mlp.experts.77.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.77.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.78.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.78.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.78.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.79.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.79.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.79.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.80.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.80.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.80.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.81.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.81.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.81.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.82.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.82.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.82.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.83.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.83.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.83.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.84.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.84.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.84.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.85.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.85.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.85.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.86.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.86.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.86.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.87.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.87.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.87.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.88.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.88.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.88.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.89.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.89.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.89.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.90.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.90.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.90.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.91.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.91.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.91.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.92.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.92.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.92.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.93.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.93.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.93.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.94.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.94.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.94.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.95.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.95.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.95.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.96.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.96.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.96.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.97.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.97.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.97.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.98.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.98.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.98.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.99.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.99.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.99.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.100.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.100.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.100.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.101.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.101.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.101.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.102.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.102.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.102.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.103.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.103.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.103.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.104.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.104.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.104.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.105.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.105.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.105.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.106.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.106.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.106.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.107.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.107.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.107.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.108.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.108.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.108.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.109.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.109.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.109.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.110.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.110.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.110.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.111.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.111.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.111.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.112.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.112.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.112.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.113.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.113.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.113.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.114.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.114.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.114.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.115.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.115.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.115.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.116.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.116.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.116.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.117.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.117.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.117.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.118.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.118.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.118.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.119.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.119.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.119.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.120.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.120.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.120.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.121.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.121.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.121.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.122.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.122.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.122.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.123.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.123.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.123.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.124.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.124.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.124.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.125.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.125.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.125.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.126.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.126.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.126.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.127.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.127.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.experts.127.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.gate.weight": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.gate.weight_1": "model-00058-of-00172.safetensors", + "model.layers.19.mlp.moe_statics.e_score_correction_bias": "model-00058-of-00172.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00058-of-00172.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.input_layernorm.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.0.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.0.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.1.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.1.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.2.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.2.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.3.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.3.up_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.4.down_proj.weight": "model-00058-of-00172.safetensors", + "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.4.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.5.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.5.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.6.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.6.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.7.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.7.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.8.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.8.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.9.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.9.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.10.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.10.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.11.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.11.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.12.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.12.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.13.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.13.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.14.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.14.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.15.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.15.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.16.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.16.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.17.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.17.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.18.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.18.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.19.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.19.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.20.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.20.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.21.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.21.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.22.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.22.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.23.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.23.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.24.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.24.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.25.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.25.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.26.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.26.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.27.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.27.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.28.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.28.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.29.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.29.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.30.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.30.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.31.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.31.up_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.32.down_proj.weight": "model-00059-of-00172.safetensors", + "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.32.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.33.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.33.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.34.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.34.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.35.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.35.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.36.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.36.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.37.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.37.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.38.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.38.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.39.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.39.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.40.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.40.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.41.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.41.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.42.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.42.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.43.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.43.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.44.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.44.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.45.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.45.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.46.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.46.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.47.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.47.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.48.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.48.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.49.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.49.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.50.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.50.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.51.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.51.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.52.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.52.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.53.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.53.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.54.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.54.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.55.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.55.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.56.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.56.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.57.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.57.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.58.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.58.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.59.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.59.up_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.60.down_proj.weight": "model-00060-of-00172.safetensors", + "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.60.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.61.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.61.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.62.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.62.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.63.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.63.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.64.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.64.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.64.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.65.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.65.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.65.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.66.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.66.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.66.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.67.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.67.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.67.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.68.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.68.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.68.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.69.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.69.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.69.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.70.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.70.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.70.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.71.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.71.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.71.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.72.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.72.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.72.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.73.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.73.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.73.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.74.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.74.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.74.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.75.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.75.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.75.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.76.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.76.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.76.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.77.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.77.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.77.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.78.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.78.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.78.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.79.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.79.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.79.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.80.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.80.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.80.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.81.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.81.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.81.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.82.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.82.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.82.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.83.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.83.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.83.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.84.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.84.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.84.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.85.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.85.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.85.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.86.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.86.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.86.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.87.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.87.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.87.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.88.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.88.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.88.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.89.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.89.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.89.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.90.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.90.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.90.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.91.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.91.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.91.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.92.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.92.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.92.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.93.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.93.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.93.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.94.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.94.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.94.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.95.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.95.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.95.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.96.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.96.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.96.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.97.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.97.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.97.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.98.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.98.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.98.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.99.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.99.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.99.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.100.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.100.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.100.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.101.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.101.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.101.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.102.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.102.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.102.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.103.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.103.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.103.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.104.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.104.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.104.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.105.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.105.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.105.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.106.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.106.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.106.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.107.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.107.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.107.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.108.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.108.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.108.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.109.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.109.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.109.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.110.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.110.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.110.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.111.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.111.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.111.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.112.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.112.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.112.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.113.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.113.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.113.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.114.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.114.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.114.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.115.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.115.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.115.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.116.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.116.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.116.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.117.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.117.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.117.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.118.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.118.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.118.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.119.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.119.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.119.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.120.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.120.gate_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.120.up_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.121.down_proj.weight": "model-00061-of-00172.safetensors", + "model.layers.20.mlp.experts.121.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.121.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.122.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.122.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.122.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.123.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.123.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.123.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.124.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.124.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.124.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.125.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.125.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.125.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.126.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.126.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.126.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.127.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.127.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.experts.127.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.gate.weight": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.gate.weight_1": "model-00062-of-00172.safetensors", + "model.layers.20.mlp.moe_statics.e_score_correction_bias": "model-00062-of-00172.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00062-of-00172.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.input_layernorm.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.0.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.0.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.1.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.1.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.2.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.2.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.3.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.3.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.4.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.4.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.5.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.5.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.6.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.6.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.7.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.7.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.8.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.8.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.9.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.9.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.10.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.10.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.11.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.11.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.12.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.12.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.13.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.13.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.14.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.14.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.15.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.15.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.16.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.16.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.17.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.17.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.18.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.18.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.19.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.19.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.20.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.20.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.21.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.21.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.22.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.22.up_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.23.down_proj.weight": "model-00062-of-00172.safetensors", + "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.23.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.24.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.24.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.25.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.25.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.26.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.26.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.27.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.27.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.28.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.28.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.29.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.29.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.30.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.30.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.31.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.31.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.32.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.32.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.33.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.33.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.34.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.34.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.35.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.35.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.36.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.36.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.37.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.37.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.38.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.38.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.39.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.39.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.40.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.40.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.41.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.41.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.42.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.42.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.43.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.43.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.44.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.44.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.45.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.45.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.46.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.46.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.47.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.47.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.48.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.48.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.49.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.49.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.50.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.50.up_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.51.down_proj.weight": "model-00063-of-00172.safetensors", + "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.51.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.52.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.52.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.53.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.53.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.54.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.54.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.55.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.55.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.56.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.56.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.57.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.57.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.58.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.58.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.59.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.59.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.60.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.60.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.61.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.61.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.62.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.62.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.63.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.63.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.64.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.64.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.64.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.65.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.65.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.65.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.66.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.66.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.66.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.67.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.67.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.67.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.68.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.68.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.68.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.69.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.69.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.69.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.70.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.70.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.70.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.71.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.71.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.71.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.72.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.72.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.72.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.73.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.73.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.73.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.74.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.74.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.74.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.75.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.75.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.75.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.76.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.76.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.76.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.77.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.77.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.77.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.78.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.78.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.78.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.79.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.79.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.79.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.80.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.80.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.80.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.81.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.81.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.81.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.82.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.82.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.82.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.83.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.83.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.83.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.84.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.84.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.84.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.85.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.85.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.85.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.86.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.86.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.86.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.87.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.87.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.87.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.88.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.88.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.88.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.89.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.89.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.89.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.90.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.90.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.90.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.91.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.91.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.91.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.92.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.92.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.92.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.93.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.93.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.93.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.94.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.94.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.94.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.95.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.95.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.95.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.96.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.96.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.96.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.97.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.97.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.97.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.98.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.98.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.98.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.99.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.99.gate_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.99.up_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.100.down_proj.weight": "model-00064-of-00172.safetensors", + "model.layers.21.mlp.experts.100.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.100.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.101.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.101.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.101.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.102.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.102.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.102.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.103.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.103.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.103.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.104.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.104.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.104.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.105.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.105.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.105.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.106.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.106.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.106.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.107.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.107.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.107.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.108.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.108.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.108.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.109.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.109.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.109.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.110.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.110.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.110.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.111.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.111.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.111.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.112.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.112.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.112.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.113.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.113.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.113.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.114.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.114.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.114.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.115.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.115.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.115.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.116.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.116.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.116.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.117.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.117.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.117.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.118.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.118.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.118.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.119.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.119.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.119.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.120.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.120.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.120.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.121.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.121.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.121.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.122.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.122.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.122.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.123.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.123.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.123.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.124.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.124.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.124.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.125.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.125.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.125.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.126.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.126.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.126.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.127.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.127.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.experts.127.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.gate.weight": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.gate.weight_1": "model-00065-of-00172.safetensors", + "model.layers.21.mlp.moe_statics.e_score_correction_bias": "model-00065-of-00172.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00065-of-00172.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.input_layernorm.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.0.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.0.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.1.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.1.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.2.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.2.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.3.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.3.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.4.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.4.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.5.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.5.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.6.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.6.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.7.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.7.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.8.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.8.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.9.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.9.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.10.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.10.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.11.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.11.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.12.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.12.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.13.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.13.up_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.14.down_proj.weight": "model-00065-of-00172.safetensors", + "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.14.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.15.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.15.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.16.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.16.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.17.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.17.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.18.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.18.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.19.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.19.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.20.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.20.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.21.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.21.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.22.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.22.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.23.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.23.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.24.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.24.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.25.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.25.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.26.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.26.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.27.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.27.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.28.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.28.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.29.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.29.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.30.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.30.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.31.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.31.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.32.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.32.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.33.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.33.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.34.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.34.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.35.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.35.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.36.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.36.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.37.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.37.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.38.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.38.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.39.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.39.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.40.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.40.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.41.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.41.up_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.42.down_proj.weight": "model-00066-of-00172.safetensors", + "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.42.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.43.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.43.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.44.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.44.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.45.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.45.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.46.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.46.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.47.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.47.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.48.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.48.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.49.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.49.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.50.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.50.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.51.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.51.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.52.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.52.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.53.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.53.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.54.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.54.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.55.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.55.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.56.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.56.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.57.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.57.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.58.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.58.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.59.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.59.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.60.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.60.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.61.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.61.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.62.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.62.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.63.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.63.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.64.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.64.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.64.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.65.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.65.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.65.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.66.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.66.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.66.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.67.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.67.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.67.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.68.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.68.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.68.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.69.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.69.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.69.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.70.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.70.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.70.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.71.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.71.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.71.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.72.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.72.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.72.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.73.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.73.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.73.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.74.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.74.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.74.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.75.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.75.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.75.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.76.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.76.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.76.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.77.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.77.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.77.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.78.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.78.gate_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.78.up_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.79.down_proj.weight": "model-00067-of-00172.safetensors", + "model.layers.22.mlp.experts.79.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.79.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.80.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.80.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.80.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.81.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.81.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.81.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.82.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.82.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.82.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.83.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.83.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.83.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.84.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.84.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.84.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.85.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.85.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.85.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.86.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.86.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.86.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.87.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.87.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.87.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.88.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.88.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.88.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.89.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.89.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.89.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.90.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.90.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.90.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.91.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.91.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.91.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.92.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.92.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.92.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.93.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.93.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.93.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.94.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.94.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.94.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.95.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.95.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.95.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.96.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.96.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.96.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.97.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.97.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.97.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.98.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.98.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.98.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.99.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.99.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.99.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.100.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.100.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.100.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.101.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.101.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.101.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.102.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.102.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.102.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.103.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.103.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.103.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.104.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.104.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.104.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.105.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.105.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.105.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.106.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.106.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.106.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.107.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.107.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.107.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.108.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.108.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.108.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.109.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.109.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.109.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.110.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.110.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.110.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.111.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.111.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.111.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.112.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.112.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.112.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.113.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.113.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.113.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.114.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.114.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.114.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.115.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.115.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.115.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.116.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.116.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.116.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.117.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.117.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.117.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.118.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.118.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.118.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.119.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.119.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.119.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.120.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.120.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.120.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.121.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.121.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.121.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.122.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.122.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.122.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.123.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.123.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.123.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.124.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.124.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.124.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.125.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.125.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.125.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.126.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.126.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.126.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.127.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.127.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.experts.127.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.gate.weight": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.gate.weight_1": "model-00068-of-00172.safetensors", + "model.layers.22.mlp.moe_statics.e_score_correction_bias": "model-00068-of-00172.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00068-of-00172.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.input_layernorm.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.0.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.0.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.1.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.1.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.2.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.2.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.3.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.3.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.4.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.4.up_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.5.down_proj.weight": "model-00068-of-00172.safetensors", + "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.5.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.6.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.6.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.7.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.7.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.8.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.8.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.9.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.9.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.10.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.10.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.11.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.11.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.12.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.12.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.13.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.13.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.14.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.14.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.15.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.15.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.16.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.16.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.17.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.17.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.18.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.18.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.19.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.19.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.20.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.20.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.21.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.21.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.22.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.22.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.23.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.23.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.24.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.24.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.25.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.25.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.26.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.26.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.27.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.27.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.28.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.28.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.29.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.29.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.30.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.30.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.31.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.31.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.32.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.32.up_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.33.down_proj.weight": "model-00069-of-00172.safetensors", + "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.33.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.34.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.34.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.35.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.35.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.36.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.36.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.37.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.37.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.38.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.38.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.39.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.39.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.40.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.40.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.41.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.41.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.42.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.42.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.43.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.43.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.44.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.44.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.45.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.45.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.46.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.46.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.47.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.47.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.48.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.48.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.49.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.49.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.50.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.50.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.51.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.51.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.52.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.52.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.53.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.53.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.54.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.54.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.55.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.55.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.56.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.56.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.57.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.57.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.58.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.58.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.59.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.59.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.60.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.60.up_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.61.down_proj.weight": "model-00070-of-00172.safetensors", + "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.61.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.62.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.62.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.63.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.63.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.64.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.64.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.64.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.65.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.65.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.65.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.66.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.66.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.66.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.67.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.67.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.67.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.68.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.68.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.68.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.69.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.69.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.69.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.70.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.70.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.70.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.71.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.71.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.71.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.72.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.72.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.72.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.73.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.73.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.73.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.74.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.74.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.74.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.75.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.75.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.75.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.76.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.76.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.76.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.77.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.77.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.77.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.78.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.78.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.78.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.79.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.79.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.79.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.80.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.80.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.80.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.81.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.81.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.81.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.82.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.82.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.82.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.83.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.83.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.83.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.84.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.84.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.84.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.85.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.85.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.85.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.86.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.86.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.86.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.87.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.87.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.87.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.88.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.88.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.88.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.89.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.89.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.89.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.90.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.90.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.90.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.91.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.91.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.91.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.92.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.92.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.92.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.93.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.93.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.93.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.94.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.94.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.94.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.95.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.95.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.95.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.96.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.96.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.96.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.97.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.97.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.97.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.98.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.98.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.98.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.99.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.99.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.99.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.100.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.100.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.100.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.101.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.101.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.101.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.102.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.102.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.102.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.103.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.103.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.103.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.104.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.104.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.104.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.105.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.105.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.105.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.106.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.106.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.106.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.107.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.107.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.107.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.108.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.108.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.108.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.109.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.109.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.109.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.110.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.110.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.110.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.111.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.111.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.111.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.112.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.112.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.112.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.113.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.113.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.113.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.114.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.114.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.114.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.115.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.115.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.115.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.116.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.116.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.116.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.117.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.117.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.117.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.118.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.118.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.118.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.119.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.119.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.119.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.120.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.120.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.120.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.121.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.121.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.121.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.122.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.122.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.122.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.123.down_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.123.gate_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.123.up_proj.weight": "model-00071-of-00172.safetensors", + "model.layers.23.mlp.experts.124.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.124.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.124.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.125.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.125.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.125.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.126.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.126.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.126.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.127.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.127.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.experts.127.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.gate.weight": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.gate.weight_1": "model-00072-of-00172.safetensors", + "model.layers.23.mlp.moe_statics.e_score_correction_bias": "model-00072-of-00172.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00072-of-00172.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.input_layernorm.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.0.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.0.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.1.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.1.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.2.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.2.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.3.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.3.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.4.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.4.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.5.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.5.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.6.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.6.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.7.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.7.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.8.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.8.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.9.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.9.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.10.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.10.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.11.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.11.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.12.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.12.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.13.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.13.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.14.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.14.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.15.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.15.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.16.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.16.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.17.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.17.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.18.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.18.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.19.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.19.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.20.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.20.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.21.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.21.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.22.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.22.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.23.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.23.up_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.24.down_proj.weight": "model-00072-of-00172.safetensors", + "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.24.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.25.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.25.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.26.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.26.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.27.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.27.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.28.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.28.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.29.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.29.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.30.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.30.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.31.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.31.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.32.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.32.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.33.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.33.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.34.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.34.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.35.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.35.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.36.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.36.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.37.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.37.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.38.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.38.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.39.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.39.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.40.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.40.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.41.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.41.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.42.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.42.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.43.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.43.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.44.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.44.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.45.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.45.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.46.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.46.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.47.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.47.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.48.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.48.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.49.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.49.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.50.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.50.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.51.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.51.up_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.52.down_proj.weight": "model-00073-of-00172.safetensors", + "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.52.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.53.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.53.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.54.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.54.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.55.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.55.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.56.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.56.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.57.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.57.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.58.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.58.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.59.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.59.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.60.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.60.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.61.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.61.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.62.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.62.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.63.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.63.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.64.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.64.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.64.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.65.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.65.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.65.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.66.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.66.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.66.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.67.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.67.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.67.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.68.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.68.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.68.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.69.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.69.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.69.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.70.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.70.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.70.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.71.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.71.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.71.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.72.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.72.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.72.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.73.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.73.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.73.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.74.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.74.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.74.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.75.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.75.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.75.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.76.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.76.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.76.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.77.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.77.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.77.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.78.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.78.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.78.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.79.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.79.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.79.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.80.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.80.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.80.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.81.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.81.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.81.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.82.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.82.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.82.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.83.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.83.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.83.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.84.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.84.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.84.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.85.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.85.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.85.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.86.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.86.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.86.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.87.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.87.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.87.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.88.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.88.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.88.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.89.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.89.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.89.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.90.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.90.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.90.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.91.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.91.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.91.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.92.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.92.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.92.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.93.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.93.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.93.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.94.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.94.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.94.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.95.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.95.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.95.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.96.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.96.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.96.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.97.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.97.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.97.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.98.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.98.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.98.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.99.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.99.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.99.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.100.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.100.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.100.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.101.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.101.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.101.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.102.down_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.102.gate_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.102.up_proj.weight": "model-00074-of-00172.safetensors", + "model.layers.24.mlp.experts.103.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.103.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.103.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.104.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.104.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.104.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.105.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.105.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.105.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.106.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.106.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.106.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.107.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.107.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.107.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.108.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.108.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.108.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.109.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.109.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.109.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.110.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.110.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.110.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.111.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.111.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.111.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.112.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.112.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.112.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.113.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.113.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.113.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.114.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.114.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.114.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.115.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.115.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.115.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.116.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.116.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.116.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.117.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.117.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.117.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.118.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.118.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.118.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.119.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.119.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.119.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.120.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.120.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.120.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.121.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.121.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.121.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.122.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.122.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.122.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.123.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.123.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.123.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.124.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.124.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.124.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.125.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.125.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.125.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.126.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.126.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.126.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.127.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.127.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.experts.127.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.gate.weight": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.gate.weight_1": "model-00075-of-00172.safetensors", + "model.layers.24.mlp.moe_statics.e_score_correction_bias": "model-00075-of-00172.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00075-of-00172.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.input_layernorm.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.0.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.0.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.1.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.1.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.2.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.2.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.3.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.3.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.4.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.4.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.5.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.5.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.6.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.6.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.7.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.7.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.8.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.8.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.9.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.9.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.10.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.10.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.11.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.11.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.12.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.12.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.13.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.13.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.14.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.14.up_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.15.down_proj.weight": "model-00075-of-00172.safetensors", + "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.15.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.16.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.16.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.17.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.17.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.18.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.18.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.19.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.19.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.20.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.20.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.21.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.21.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.22.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.22.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.23.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.23.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.24.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.24.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.25.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.25.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.26.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.26.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.27.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.27.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.28.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.28.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.29.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.29.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.30.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.30.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.31.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.31.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.32.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.32.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.33.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.33.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.34.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.34.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.35.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.35.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.36.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.36.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.37.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.37.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.38.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.38.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.39.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.39.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.40.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.40.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.41.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.41.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.42.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.42.up_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.43.down_proj.weight": "model-00076-of-00172.safetensors", + "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.43.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.44.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.44.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.45.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.45.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.46.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.46.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.47.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.47.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.48.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.48.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.49.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.49.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.50.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.50.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.51.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.51.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.52.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.52.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.53.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.53.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.54.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.54.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.55.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.55.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.56.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.56.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.57.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.57.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.58.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.58.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.59.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.59.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.60.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.60.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.61.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.61.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.62.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.62.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.63.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.63.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.64.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.64.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.64.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.65.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.65.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.65.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.66.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.66.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.66.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.67.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.67.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.67.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.68.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.68.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.68.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.69.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.69.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.69.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.70.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.70.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.70.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.71.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.71.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.71.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.72.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.72.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.72.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.73.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.73.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.73.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.74.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.74.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.74.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.75.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.75.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.75.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.76.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.76.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.76.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.77.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.77.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.77.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.78.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.78.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.78.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.79.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.79.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.79.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.80.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.80.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.80.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.81.down_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.81.gate_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.81.up_proj.weight": "model-00077-of-00172.safetensors", + "model.layers.25.mlp.experts.82.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.82.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.82.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.83.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.83.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.83.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.84.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.84.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.84.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.85.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.85.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.85.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.86.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.86.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.86.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.87.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.87.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.87.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.88.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.88.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.88.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.89.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.89.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.89.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.90.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.90.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.90.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.91.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.91.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.91.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.92.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.92.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.92.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.93.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.93.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.93.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.94.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.94.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.94.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.95.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.95.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.95.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.96.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.96.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.96.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.97.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.97.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.97.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.98.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.98.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.98.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.99.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.99.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.99.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.100.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.100.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.100.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.101.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.101.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.101.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.102.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.102.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.102.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.103.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.103.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.103.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.104.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.104.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.104.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.105.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.105.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.105.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.106.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.106.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.106.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.107.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.107.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.107.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.108.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.108.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.108.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.109.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.109.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.109.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.110.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.110.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.110.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.111.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.111.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.111.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.112.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.112.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.112.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.113.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.113.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.113.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.114.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.114.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.114.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.115.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.115.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.115.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.116.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.116.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.116.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.117.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.117.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.117.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.118.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.118.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.118.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.119.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.119.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.119.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.120.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.120.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.120.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.121.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.121.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.121.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.122.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.122.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.122.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.123.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.123.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.123.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.124.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.124.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.124.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.125.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.125.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.125.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.126.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.126.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.126.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.127.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.127.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.experts.127.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.gate.weight": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.gate.weight_1": "model-00078-of-00172.safetensors", + "model.layers.25.mlp.moe_statics.e_score_correction_bias": "model-00078-of-00172.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00078-of-00172.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.input_layernorm.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.0.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.0.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.1.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.1.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.2.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.2.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.3.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.3.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.4.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.4.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.5.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.5.up_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.6.down_proj.weight": "model-00078-of-00172.safetensors", + "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.6.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.7.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.7.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.8.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.8.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.9.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.9.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.10.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.10.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.11.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.11.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.12.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.12.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.13.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.13.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.14.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.14.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.15.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.15.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.16.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.16.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.17.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.17.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.18.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.18.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.19.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.19.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.20.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.20.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.21.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.21.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.22.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.22.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.23.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.23.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.24.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.24.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.25.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.25.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.26.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.26.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.27.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.27.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.28.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.28.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.29.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.29.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.30.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.30.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.31.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.31.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.32.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.32.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.33.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.33.up_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.34.down_proj.weight": "model-00079-of-00172.safetensors", + "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.34.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.35.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.35.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.36.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.36.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.37.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.37.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.38.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.38.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.39.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.39.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.40.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.40.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.41.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.41.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.42.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.42.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.43.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.43.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.44.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.44.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.45.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.45.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.46.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.46.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.47.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.47.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.48.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.48.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.49.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.49.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.50.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.50.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.51.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.51.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.52.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.52.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.53.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.53.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.54.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.54.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.55.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.55.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.56.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.56.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.57.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.57.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.58.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.58.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.59.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.59.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.60.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.60.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.61.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.61.up_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.62.down_proj.weight": "model-00080-of-00172.safetensors", + "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.62.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.63.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.63.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.64.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.64.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.64.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.65.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.65.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.65.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.66.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.66.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.66.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.67.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.67.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.67.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.68.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.68.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.68.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.69.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.69.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.69.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.70.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.70.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.70.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.71.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.71.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.71.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.72.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.72.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.72.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.73.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.73.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.73.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.74.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.74.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.74.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.75.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.75.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.75.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.76.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.76.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.76.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.77.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.77.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.77.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.78.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.78.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.78.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.79.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.79.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.79.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.80.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.80.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.80.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.81.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.81.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.81.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.82.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.82.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.82.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.83.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.83.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.83.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.84.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.84.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.84.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.85.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.85.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.85.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.86.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.86.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.86.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.87.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.87.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.87.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.88.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.88.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.88.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.89.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.89.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.89.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.90.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.90.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.90.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.91.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.91.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.91.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.92.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.92.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.92.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.93.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.93.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.93.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.94.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.94.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.94.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.95.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.95.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.95.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.96.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.96.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.96.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.97.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.97.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.97.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.98.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.98.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.98.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.99.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.99.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.99.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.100.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.100.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.100.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.101.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.101.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.101.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.102.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.102.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.102.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.103.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.103.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.103.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.104.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.104.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.104.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.105.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.105.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.105.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.106.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.106.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.106.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.107.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.107.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.107.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.108.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.108.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.108.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.109.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.109.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.109.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.110.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.110.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.110.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.111.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.111.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.111.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.112.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.112.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.112.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.113.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.113.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.113.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.114.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.114.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.114.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.115.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.115.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.115.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.116.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.116.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.116.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.117.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.117.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.117.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.118.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.118.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.118.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.119.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.119.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.119.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.120.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.120.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.120.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.121.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.121.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.121.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.122.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.122.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.122.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.123.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.123.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.123.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.124.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.124.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.124.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.125.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.125.gate_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.125.up_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.126.down_proj.weight": "model-00081-of-00172.safetensors", + "model.layers.26.mlp.experts.126.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.experts.126.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.experts.127.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.experts.127.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.experts.127.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.gate.weight": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.gate.weight_1": "model-00082-of-00172.safetensors", + "model.layers.26.mlp.moe_statics.e_score_correction_bias": "model-00082-of-00172.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00082-of-00172.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.input_layernorm.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.0.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.0.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.1.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.1.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.2.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.2.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.3.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.3.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.4.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.4.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.5.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.5.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.6.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.6.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.7.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.7.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.8.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.8.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.9.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.9.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.10.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.10.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.11.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.11.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.12.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.12.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.13.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.13.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.14.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.14.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.15.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.15.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.16.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.16.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.17.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.17.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.18.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.18.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.19.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.19.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.20.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.20.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.21.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.21.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.22.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.22.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.23.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.23.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.24.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.24.up_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.25.down_proj.weight": "model-00082-of-00172.safetensors", + "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.25.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.26.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.26.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.27.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.27.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.28.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.28.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.29.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.29.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.30.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.30.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.31.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.31.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.32.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.32.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.33.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.33.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.34.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.34.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.35.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.35.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.36.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.36.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.37.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.37.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.38.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.38.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.39.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.39.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.40.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.40.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.41.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.41.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.42.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.42.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.43.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.43.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.44.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.44.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.45.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.45.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.46.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.46.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.47.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.47.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.48.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.48.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.49.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.49.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.50.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.50.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.51.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.51.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.52.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.52.up_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.53.down_proj.weight": "model-00083-of-00172.safetensors", + "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.53.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.54.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.54.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.55.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.55.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.56.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.56.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.57.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.57.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.58.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.58.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.59.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.59.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.60.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.60.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.61.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.61.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.62.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.62.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.63.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.63.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.64.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.64.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.64.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.65.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.65.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.65.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.66.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.66.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.66.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.67.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.67.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.67.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.68.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.68.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.68.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.69.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.69.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.69.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.70.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.70.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.70.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.71.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.71.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.71.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.72.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.72.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.72.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.73.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.73.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.73.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.74.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.74.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.74.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.75.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.75.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.75.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.76.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.76.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.76.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.77.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.77.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.77.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.78.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.78.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.78.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.79.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.79.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.79.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.80.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.80.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.80.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.81.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.81.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.81.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.82.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.82.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.82.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.83.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.83.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.83.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.84.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.84.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.84.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.85.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.85.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.85.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.86.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.86.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.86.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.87.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.87.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.87.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.88.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.88.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.88.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.89.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.89.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.89.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.90.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.90.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.90.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.91.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.91.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.91.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.92.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.92.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.92.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.93.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.93.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.93.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.94.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.94.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.94.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.95.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.95.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.95.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.96.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.96.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.96.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.97.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.97.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.97.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.98.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.98.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.98.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.99.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.99.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.99.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.100.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.100.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.100.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.101.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.101.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.101.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.102.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.102.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.102.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.103.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.103.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.103.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.104.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.104.gate_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.104.up_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.105.down_proj.weight": "model-00084-of-00172.safetensors", + "model.layers.27.mlp.experts.105.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.105.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.106.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.106.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.106.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.107.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.107.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.107.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.108.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.108.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.108.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.109.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.109.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.109.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.110.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.110.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.110.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.111.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.111.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.111.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.112.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.112.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.112.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.113.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.113.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.113.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.114.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.114.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.114.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.115.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.115.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.115.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.116.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.116.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.116.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.117.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.117.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.117.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.118.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.118.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.118.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.119.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.119.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.119.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.120.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.120.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.120.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.121.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.121.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.121.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.122.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.122.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.122.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.123.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.123.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.123.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.124.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.124.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.124.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.125.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.125.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.125.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.126.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.126.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.126.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.127.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.127.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.experts.127.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.gate.weight": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.gate.weight_1": "model-00085-of-00172.safetensors", + "model.layers.27.mlp.moe_statics.e_score_correction_bias": "model-00085-of-00172.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00085-of-00172.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.input_layernorm.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.0.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.0.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.1.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.1.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.2.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.2.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.3.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.3.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.4.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.4.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.5.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.5.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.6.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.6.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.7.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.7.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.8.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.8.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.9.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.9.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.10.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.10.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.11.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.11.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.12.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.12.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.13.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.13.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.14.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.14.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.15.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.15.up_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.16.down_proj.weight": "model-00085-of-00172.safetensors", + "model.layers.28.mlp.experts.16.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.16.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.17.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.17.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.17.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.18.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.18.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.18.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.19.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.19.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.19.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.20.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.20.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.20.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.21.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.21.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.21.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.22.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.22.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.22.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.23.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.23.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.23.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.24.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.24.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.24.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.25.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.25.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.25.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.26.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.26.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.26.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.27.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.27.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.27.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.28.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.28.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.28.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.29.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.29.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.29.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.30.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.30.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.30.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.31.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.31.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.31.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.32.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.32.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.32.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.33.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.33.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.33.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.34.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.34.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.34.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.35.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.35.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.35.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.36.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.36.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.36.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.37.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.37.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.37.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.38.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.38.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.38.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.39.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.39.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.39.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.40.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.40.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.40.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.41.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.41.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.41.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.42.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.42.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.42.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.43.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.43.gate_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.43.up_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.44.down_proj.weight": "model-00086-of-00172.safetensors", + "model.layers.28.mlp.experts.44.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.44.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.45.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.45.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.45.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.46.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.46.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.46.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.47.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.47.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.47.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.48.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.48.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.48.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.49.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.49.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.49.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.50.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.50.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.50.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.51.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.51.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.51.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.52.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.52.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.52.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.53.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.53.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.53.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.54.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.54.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.54.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.55.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.55.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.55.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.56.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.56.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.56.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.57.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.57.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.57.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.58.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.58.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.58.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.59.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.59.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.59.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.60.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.60.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.60.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.61.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.61.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.61.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.62.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.62.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.62.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.63.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.63.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.63.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.64.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.64.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.64.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.65.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.65.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.65.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.66.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.66.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.66.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.67.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.67.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.67.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.68.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.68.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.68.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.69.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.69.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.69.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.70.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.70.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.70.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.71.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.71.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.71.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.72.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.72.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.72.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.73.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.73.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.73.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.74.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.74.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.74.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.75.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.75.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.75.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.76.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.76.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.76.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.77.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.77.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.77.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.78.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.78.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.78.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.79.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.79.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.79.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.80.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.80.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.80.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.81.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.81.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.81.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.82.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.82.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.82.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.83.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.83.gate_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.83.up_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.84.down_proj.weight": "model-00087-of-00172.safetensors", + "model.layers.28.mlp.experts.84.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.84.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.85.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.85.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.85.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.86.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.86.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.86.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.87.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.87.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.87.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.88.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.88.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.88.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.89.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.89.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.89.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.90.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.90.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.90.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.91.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.91.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.91.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.92.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.92.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.92.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.93.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.93.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.93.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.94.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.94.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.94.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.95.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.95.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.95.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.96.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.96.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.96.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.97.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.97.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.97.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.98.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.98.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.98.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.99.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.99.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.99.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.100.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.100.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.100.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.101.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.101.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.101.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.102.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.102.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.102.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.103.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.103.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.103.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.104.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.104.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.104.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.105.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.105.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.105.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.106.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.106.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.106.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.107.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.107.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.107.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.108.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.108.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.108.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.109.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.109.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.109.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.110.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.110.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.110.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.111.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.111.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.111.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.112.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.112.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.112.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.113.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.113.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.113.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.114.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.114.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.114.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.115.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.115.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.115.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.116.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.116.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.116.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.117.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.117.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.117.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.118.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.118.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.118.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.119.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.119.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.119.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.120.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.120.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.120.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.121.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.121.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.121.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.122.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.122.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.122.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.123.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.123.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.123.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.124.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.124.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.124.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.125.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.125.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.125.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.126.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.126.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.126.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.127.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.127.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.experts.127.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.gate.weight": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.gate.weight_1": "model-00088-of-00172.safetensors", + "model.layers.28.mlp.moe_statics.e_score_correction_bias": "model-00088-of-00172.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00088-of-00172.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.input_layernorm.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.0.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.0.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.1.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.1.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.2.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.2.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.3.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.3.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.4.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.4.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.5.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.5.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.6.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.6.up_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.7.down_proj.weight": "model-00088-of-00172.safetensors", + "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.7.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.8.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.8.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.9.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.9.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.10.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.10.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.11.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.11.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.12.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.12.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.13.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.13.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.14.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.14.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.15.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.15.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.16.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.16.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.16.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.17.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.17.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.17.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.18.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.18.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.18.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.19.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.19.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.19.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.20.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.20.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.20.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.21.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.21.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.21.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.22.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.22.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.22.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.23.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.23.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.23.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.24.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.24.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.24.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.25.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.25.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.25.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.26.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.26.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.26.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.27.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.27.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.27.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.28.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.28.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.28.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.29.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.29.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.29.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.30.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.30.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.30.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.31.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.31.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.31.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.32.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.32.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.32.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.33.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.33.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.33.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.34.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.34.gate_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.34.up_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.35.down_proj.weight": "model-00089-of-00172.safetensors", + "model.layers.29.mlp.experts.35.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.35.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.36.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.36.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.36.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.37.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.37.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.37.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.38.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.38.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.38.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.39.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.39.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.39.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.40.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.40.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.40.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.41.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.41.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.41.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.42.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.42.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.42.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.43.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.43.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.43.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.44.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.44.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.44.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.45.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.45.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.45.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.46.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.46.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.46.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.47.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.47.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.47.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.48.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.48.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.48.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.49.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.49.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.49.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.50.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.50.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.50.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.51.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.51.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.51.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.52.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.52.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.52.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.53.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.53.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.53.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.54.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.54.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.54.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.55.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.55.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.55.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.56.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.56.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.56.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.57.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.57.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.57.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.58.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.58.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.58.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.59.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.59.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.59.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.60.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.60.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.60.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.61.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.61.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.61.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.62.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.62.gate_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.62.up_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.63.down_proj.weight": "model-00090-of-00172.safetensors", + "model.layers.29.mlp.experts.63.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.63.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.64.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.64.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.64.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.65.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.65.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.65.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.66.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.66.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.66.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.67.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.67.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.67.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.68.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.68.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.68.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.69.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.69.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.69.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.70.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.70.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.70.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.71.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.71.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.71.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.72.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.72.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.72.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.73.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.73.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.73.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.74.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.74.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.74.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.75.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.75.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.75.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.76.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.76.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.76.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.77.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.77.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.77.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.78.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.78.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.78.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.79.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.79.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.79.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.80.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.80.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.80.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.81.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.81.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.81.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.82.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.82.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.82.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.83.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.83.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.83.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.84.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.84.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.84.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.85.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.85.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.85.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.86.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.86.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.86.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.87.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.87.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.87.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.88.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.88.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.88.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.89.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.89.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.89.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.90.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.90.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.90.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.91.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.91.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.91.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.92.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.92.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.92.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.93.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.93.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.93.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.94.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.94.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.94.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.95.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.95.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.95.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.96.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.96.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.96.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.97.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.97.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.97.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.98.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.98.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.98.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.99.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.99.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.99.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.100.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.100.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.100.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.101.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.101.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.101.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.102.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.102.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.102.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.103.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.103.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.103.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.104.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.104.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.104.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.105.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.105.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.105.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.106.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.106.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.106.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.107.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.107.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.107.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.108.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.108.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.108.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.109.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.109.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.109.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.110.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.110.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.110.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.111.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.111.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.111.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.112.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.112.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.112.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.113.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.113.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.113.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.114.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.114.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.114.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.115.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.115.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.115.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.116.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.116.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.116.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.117.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.117.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.117.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.118.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.118.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.118.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.119.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.119.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.119.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.120.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.120.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.120.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.121.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.121.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.121.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.122.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.122.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.122.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.123.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.123.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.123.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.124.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.124.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.124.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.125.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.125.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.125.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.126.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.126.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.126.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.127.down_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.127.gate_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.experts.127.up_proj.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.gate.weight": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.gate.weight_1": "model-00091-of-00172.safetensors", + "model.layers.29.mlp.moe_statics.e_score_correction_bias": "model-00091-of-00172.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00091-of-00172.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.input_layernorm.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.0.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.0.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.1.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.1.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.2.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.2.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.3.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.3.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.4.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.4.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.5.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.5.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.6.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.6.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.7.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.7.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.8.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.8.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.9.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.9.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.10.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.10.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.11.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.11.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.12.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.12.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.13.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.13.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.14.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.14.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.15.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.15.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.16.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.16.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.16.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.17.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.17.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.17.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.18.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.18.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.18.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.19.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.19.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.19.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.20.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.20.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.20.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.21.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.21.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.21.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.22.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.22.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.22.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.23.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.23.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.23.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.24.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.24.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.24.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.25.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.25.gate_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.25.up_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.26.down_proj.weight": "model-00092-of-00172.safetensors", + "model.layers.30.mlp.experts.26.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.26.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.27.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.27.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.27.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.28.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.28.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.28.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.29.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.29.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.29.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.30.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.30.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.30.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.31.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.31.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.31.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.32.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.32.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.32.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.33.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.33.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.33.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.34.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.34.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.34.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.35.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.35.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.35.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.36.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.36.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.36.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.37.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.37.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.37.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.38.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.38.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.38.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.39.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.39.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.39.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.40.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.40.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.40.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.41.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.41.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.41.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.42.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.42.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.42.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.43.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.43.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.43.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.44.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.44.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.44.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.45.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.45.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.45.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.46.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.46.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.46.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.47.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.47.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.47.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.48.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.48.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.48.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.49.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.49.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.49.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.50.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.50.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.50.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.51.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.51.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.51.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.52.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.52.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.52.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.53.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.53.gate_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.53.up_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.54.down_proj.weight": "model-00093-of-00172.safetensors", + "model.layers.30.mlp.experts.54.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.54.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.55.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.55.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.55.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.56.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.56.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.56.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.57.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.57.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.57.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.58.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.58.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.58.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.59.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.59.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.59.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.60.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.60.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.60.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.61.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.61.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.61.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.62.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.62.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.62.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.63.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.63.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.63.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.64.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.64.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.64.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.65.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.65.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.65.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.66.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.66.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.66.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.67.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.67.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.67.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.68.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.68.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.68.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.69.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.69.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.69.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.70.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.70.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.70.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.71.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.71.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.71.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.72.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.72.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.72.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.73.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.73.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.73.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.74.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.74.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.74.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.75.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.75.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.75.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.76.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.76.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.76.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.77.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.77.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.77.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.78.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.78.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.78.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.79.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.79.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.79.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.80.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.80.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.80.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.81.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.81.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.81.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.82.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.82.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.82.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.83.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.83.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.83.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.84.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.84.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.84.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.85.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.85.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.85.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.86.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.86.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.86.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.87.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.87.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.87.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.88.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.88.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.88.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.89.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.89.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.89.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.90.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.90.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.90.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.91.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.91.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.91.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.92.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.92.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.92.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.93.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.93.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.93.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.94.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.94.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.94.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.95.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.95.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.95.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.96.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.96.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.96.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.97.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.97.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.97.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.98.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.98.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.98.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.99.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.99.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.99.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.100.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.100.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.100.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.101.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.101.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.101.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.102.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.102.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.102.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.103.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.103.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.103.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.104.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.104.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.104.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.105.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.105.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.105.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.106.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.106.gate_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.106.up_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.107.down_proj.weight": "model-00094-of-00172.safetensors", + "model.layers.30.mlp.experts.107.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.107.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.108.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.108.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.108.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.109.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.109.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.109.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.110.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.110.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.110.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.111.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.111.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.111.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.112.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.112.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.112.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.113.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.113.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.113.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.114.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.114.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.114.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.115.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.115.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.115.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.116.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.116.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.116.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.117.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.117.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.117.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.118.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.118.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.118.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.119.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.119.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.119.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.120.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.120.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.120.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.121.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.121.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.121.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.122.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.122.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.122.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.123.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.123.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.123.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.124.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.124.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.124.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.125.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.125.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.125.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.126.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.126.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.126.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.127.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.127.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.experts.127.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.gate.weight": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.gate.weight_1": "model-00095-of-00172.safetensors", + "model.layers.30.mlp.moe_statics.e_score_correction_bias": "model-00095-of-00172.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00095-of-00172.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.input_layernorm.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.0.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.0.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.1.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.1.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.2.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.2.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.3.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.3.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.4.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.4.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.5.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.5.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.6.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.6.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.7.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.7.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.8.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.8.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.9.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.9.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.10.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.10.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.11.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.11.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.12.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.12.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.13.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.13.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.14.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.14.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.15.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.15.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.16.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.16.gate_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.16.up_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.17.down_proj.weight": "model-00095-of-00172.safetensors", + "model.layers.31.mlp.experts.17.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.17.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.18.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.18.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.18.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.19.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.19.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.19.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.20.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.20.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.20.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.21.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.21.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.21.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.22.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.22.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.22.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.23.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.23.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.23.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.24.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.24.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.24.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.25.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.25.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.25.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.26.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.26.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.26.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.27.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.27.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.27.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.28.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.28.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.28.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.29.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.29.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.29.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.30.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.30.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.30.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.31.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.31.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.31.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.32.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.32.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.32.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.33.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.33.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.33.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.34.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.34.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.34.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.35.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.35.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.35.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.36.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.36.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.36.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.37.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.37.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.37.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.38.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.38.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.38.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.39.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.39.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.39.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.40.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.40.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.40.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.41.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.41.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.41.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.42.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.42.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.42.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.43.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.43.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.43.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.44.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.44.gate_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.44.up_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.45.down_proj.weight": "model-00096-of-00172.safetensors", + "model.layers.31.mlp.experts.45.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.45.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.46.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.46.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.46.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.47.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.47.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.47.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.48.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.48.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.48.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.49.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.49.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.49.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.50.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.50.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.50.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.51.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.51.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.51.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.52.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.52.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.52.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.53.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.53.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.53.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.54.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.54.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.54.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.55.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.55.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.55.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.56.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.56.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.56.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.57.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.57.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.57.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.58.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.58.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.58.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.59.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.59.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.59.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.60.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.60.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.60.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.61.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.61.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.61.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.62.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.62.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.62.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.63.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.63.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.63.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.64.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.64.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.64.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.65.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.65.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.65.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.66.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.66.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.66.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.67.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.67.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.67.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.68.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.68.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.68.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.69.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.69.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.69.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.70.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.70.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.70.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.71.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.71.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.71.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.72.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.72.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.72.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.73.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.73.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.73.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.74.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.74.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.74.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.75.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.75.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.75.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.76.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.76.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.76.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.77.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.77.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.77.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.78.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.78.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.78.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.79.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.79.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.79.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.80.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.80.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.80.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.81.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.81.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.81.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.82.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.82.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.82.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.83.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.83.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.83.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.84.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.84.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.84.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.85.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.85.gate_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.85.up_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.86.down_proj.weight": "model-00097-of-00172.safetensors", + "model.layers.31.mlp.experts.86.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.86.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.87.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.87.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.87.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.88.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.88.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.88.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.89.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.89.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.89.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.90.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.90.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.90.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.91.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.91.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.91.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.92.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.92.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.92.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.93.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.93.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.93.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.94.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.94.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.94.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.95.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.95.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.95.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.96.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.96.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.96.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.97.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.97.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.97.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.98.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.98.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.98.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.99.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.99.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.99.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.100.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.100.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.100.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.101.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.101.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.101.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.102.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.102.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.102.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.103.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.103.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.103.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.104.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.104.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.104.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.105.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.105.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.105.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.106.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.106.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.106.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.107.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.107.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.107.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.108.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.108.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.108.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.109.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.109.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.109.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.110.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.110.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.110.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.111.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.111.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.111.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.112.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.112.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.112.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.113.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.113.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.113.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.114.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.114.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.114.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.115.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.115.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.115.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.116.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.116.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.116.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.117.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.117.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.117.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.118.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.118.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.118.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.119.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.119.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.119.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.120.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.120.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.120.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.121.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.121.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.121.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.122.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.122.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.122.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.123.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.123.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.123.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.124.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.124.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.124.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.125.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.125.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.125.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.126.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.126.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.126.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.127.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.127.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.experts.127.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.gate.weight": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.gate.weight_1": "model-00098-of-00172.safetensors", + "model.layers.31.mlp.moe_statics.e_score_correction_bias": "model-00098-of-00172.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00098-of-00172.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.input_layernorm.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.0.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.0.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.1.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.1.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.2.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.2.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.3.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.3.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.4.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.4.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.5.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.5.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.6.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.6.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.7.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.7.up_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.8.down_proj.weight": "model-00098-of-00172.safetensors", + "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.8.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.9.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.9.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.10.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.10.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.11.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.11.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.12.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.12.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.13.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.13.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.14.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.14.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.15.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.15.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.16.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.16.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.16.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.17.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.17.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.17.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.18.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.18.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.18.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.19.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.19.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.19.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.20.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.20.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.20.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.21.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.21.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.21.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.22.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.22.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.22.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.23.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.23.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.23.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.24.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.24.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.24.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.25.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.25.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.25.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.26.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.26.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.26.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.27.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.27.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.27.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.28.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.28.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.28.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.29.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.29.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.29.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.30.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.30.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.30.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.31.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.31.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.31.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.32.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.32.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.32.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.33.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.33.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.33.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.34.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.34.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.34.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.35.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.35.gate_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.35.up_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.36.down_proj.weight": "model-00099-of-00172.safetensors", + "model.layers.32.mlp.experts.36.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.36.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.37.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.37.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.37.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.38.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.38.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.38.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.39.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.39.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.39.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.40.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.40.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.40.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.41.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.41.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.41.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.42.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.42.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.42.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.43.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.43.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.43.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.44.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.44.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.44.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.45.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.45.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.45.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.46.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.46.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.46.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.47.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.47.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.47.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.48.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.48.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.48.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.49.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.49.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.49.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.50.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.50.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.50.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.51.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.51.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.51.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.52.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.52.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.52.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.53.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.53.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.53.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.54.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.54.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.54.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.55.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.55.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.55.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.56.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.56.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.56.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.57.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.57.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.57.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.58.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.58.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.58.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.59.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.59.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.59.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.60.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.60.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.60.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.61.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.61.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.61.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.62.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.62.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.62.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.63.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.63.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.63.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.64.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.64.gate_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.64.up_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.65.down_proj.weight": "model-00100-of-00172.safetensors", + "model.layers.32.mlp.experts.65.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.65.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.66.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.66.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.66.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.67.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.67.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.67.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.68.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.68.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.68.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.69.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.69.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.69.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.70.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.70.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.70.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.71.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.71.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.71.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.72.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.72.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.72.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.73.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.73.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.73.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.74.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.74.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.74.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.75.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.75.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.75.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.76.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.76.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.76.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.77.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.77.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.77.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.78.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.78.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.78.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.79.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.79.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.79.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.80.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.80.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.80.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.81.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.81.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.81.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.82.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.82.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.82.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.83.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.83.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.83.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.84.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.84.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.84.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.85.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.85.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.85.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.86.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.86.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.86.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.87.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.87.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.87.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.88.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.88.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.88.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.89.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.89.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.89.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.90.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.90.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.90.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.91.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.91.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.91.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.92.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.92.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.92.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.93.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.93.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.93.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.94.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.94.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.94.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.95.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.95.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.95.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.96.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.96.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.96.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.97.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.97.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.97.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.98.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.98.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.98.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.99.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.99.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.99.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.100.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.100.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.100.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.101.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.101.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.101.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.102.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.102.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.102.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.103.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.103.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.103.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.104.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.104.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.104.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.105.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.105.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.105.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.106.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.106.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.106.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.107.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.107.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.107.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.108.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.108.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.108.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.109.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.109.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.109.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.110.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.110.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.110.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.111.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.111.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.111.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.112.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.112.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.112.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.113.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.113.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.113.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.114.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.114.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.114.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.115.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.115.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.115.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.116.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.116.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.116.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.117.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.117.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.117.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.118.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.118.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.118.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.119.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.119.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.119.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.120.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.120.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.120.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.121.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.121.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.121.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.122.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.122.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.122.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.123.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.123.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.123.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.124.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.124.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.124.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.125.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.125.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.125.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.126.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.126.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.126.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.127.down_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.127.gate_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.experts.127.up_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.gate.weight": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.gate.weight_1": "model-00101-of-00172.safetensors", + "model.layers.32.mlp.moe_statics.e_score_correction_bias": "model-00101-of-00172.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00101-of-00172.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00101-of-00172.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.input_layernorm.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.0.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.0.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.1.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.1.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.2.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.2.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.3.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.3.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.4.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.4.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.5.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.5.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.6.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.6.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.7.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.7.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.8.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.8.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.9.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.9.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.10.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.10.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.11.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.11.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.12.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.12.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.13.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.13.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.14.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.14.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.15.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.15.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.16.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.16.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.16.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.17.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.17.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.17.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.18.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.18.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.18.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.19.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.19.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.19.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.20.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.20.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.20.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.21.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.21.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.21.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.22.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.22.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.22.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.23.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.23.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.23.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.24.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.24.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.24.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.25.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.25.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.25.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.26.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.26.gate_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.26.up_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.27.down_proj.weight": "model-00102-of-00172.safetensors", + "model.layers.33.mlp.experts.27.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.27.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.28.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.28.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.28.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.29.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.29.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.29.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.30.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.30.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.30.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.31.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.31.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.31.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.32.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.32.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.32.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.33.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.33.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.33.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.34.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.34.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.34.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.35.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.35.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.35.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.36.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.36.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.36.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.37.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.37.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.37.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.38.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.38.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.38.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.39.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.39.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.39.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.40.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.40.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.40.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.41.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.41.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.41.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.42.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.42.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.42.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.43.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.43.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.43.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.44.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.44.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.44.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.45.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.45.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.45.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.46.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.46.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.46.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.47.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.47.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.47.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.48.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.48.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.48.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.49.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.49.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.49.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.50.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.50.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.50.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.51.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.51.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.51.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.52.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.52.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.52.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.53.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.53.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.53.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.54.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.54.gate_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.54.up_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.55.down_proj.weight": "model-00103-of-00172.safetensors", + "model.layers.33.mlp.experts.55.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.55.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.56.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.56.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.56.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.57.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.57.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.57.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.58.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.58.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.58.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.59.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.59.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.59.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.60.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.60.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.60.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.61.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.61.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.61.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.62.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.62.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.62.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.63.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.63.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.63.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.64.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.64.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.64.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.65.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.65.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.65.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.66.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.66.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.66.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.67.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.67.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.67.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.68.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.68.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.68.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.69.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.69.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.69.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.70.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.70.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.70.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.71.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.71.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.71.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.72.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.72.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.72.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.73.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.73.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.73.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.74.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.74.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.74.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.75.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.75.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.75.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.76.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.76.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.76.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.77.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.77.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.77.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.78.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.78.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.78.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.79.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.79.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.79.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.80.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.80.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.80.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.81.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.81.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.81.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.82.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.82.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.82.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.83.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.83.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.83.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.84.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.84.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.84.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.85.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.85.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.85.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.86.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.86.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.86.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.87.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.87.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.87.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.88.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.88.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.88.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.89.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.89.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.89.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.90.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.90.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.90.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.91.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.91.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.91.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.92.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.92.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.92.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.93.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.93.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.93.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.94.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.94.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.94.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.95.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.95.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.95.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.96.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.96.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.96.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.97.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.97.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.97.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.98.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.98.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.98.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.99.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.99.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.99.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.100.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.100.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.100.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.101.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.101.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.101.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.102.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.102.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.102.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.103.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.103.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.103.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.104.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.104.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.104.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.105.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.105.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.105.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.106.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.106.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.106.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.107.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.107.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.107.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.108.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.108.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.108.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.109.down_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.109.gate_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.109.up_proj.weight": "model-00104-of-00172.safetensors", + "model.layers.33.mlp.experts.110.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.110.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.110.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.111.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.111.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.111.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.112.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.112.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.112.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.113.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.113.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.113.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.114.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.114.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.114.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.115.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.115.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.115.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.116.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.116.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.116.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.117.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.117.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.117.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.118.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.118.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.118.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.119.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.119.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.119.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.120.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.120.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.120.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.121.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.121.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.121.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.122.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.122.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.122.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.123.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.123.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.123.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.124.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.124.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.124.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.125.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.125.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.125.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.126.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.126.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.126.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.127.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.127.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.experts.127.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.gate.weight": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.gate.weight_1": "model-00105-of-00172.safetensors", + "model.layers.33.mlp.moe_statics.e_score_correction_bias": "model-00105-of-00172.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00105-of-00172.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.input_layernorm.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.0.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.0.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.1.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.1.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.2.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.2.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.3.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.3.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.4.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.4.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.5.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.5.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.6.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.6.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.7.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.7.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.8.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.8.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.9.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.9.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.10.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.10.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.11.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.11.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.12.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.12.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.13.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.13.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.14.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.14.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.15.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.15.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.16.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.16.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.16.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.17.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.17.gate_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.17.up_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.18.down_proj.weight": "model-00105-of-00172.safetensors", + "model.layers.34.mlp.experts.18.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.18.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.19.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.19.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.19.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.20.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.20.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.20.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.21.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.21.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.21.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.22.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.22.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.22.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.23.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.23.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.23.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.24.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.24.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.24.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.25.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.25.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.25.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.26.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.26.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.26.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.27.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.27.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.27.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.28.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.28.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.28.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.29.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.29.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.29.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.30.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.30.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.30.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.31.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.31.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.31.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.32.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.32.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.32.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.33.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.33.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.33.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.34.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.34.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.34.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.35.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.35.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.35.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.36.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.36.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.36.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.37.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.37.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.37.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.38.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.38.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.38.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.39.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.39.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.39.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.40.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.40.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.40.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.41.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.41.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.41.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.42.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.42.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.42.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.43.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.43.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.43.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.44.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.44.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.44.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.45.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.45.gate_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.45.up_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.46.down_proj.weight": "model-00106-of-00172.safetensors", + "model.layers.34.mlp.experts.46.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.46.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.47.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.47.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.47.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.48.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.48.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.48.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.49.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.49.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.49.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.50.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.50.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.50.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.51.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.51.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.51.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.52.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.52.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.52.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.53.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.53.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.53.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.54.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.54.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.54.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.55.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.55.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.55.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.56.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.56.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.56.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.57.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.57.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.57.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.58.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.58.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.58.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.59.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.59.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.59.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.60.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.60.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.60.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.61.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.61.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.61.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.62.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.62.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.62.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.63.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.63.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.63.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.64.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.64.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.64.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.65.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.65.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.65.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.66.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.66.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.66.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.67.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.67.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.67.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.68.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.68.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.68.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.69.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.69.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.69.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.70.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.70.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.70.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.71.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.71.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.71.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.72.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.72.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.72.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.73.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.73.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.73.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.74.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.74.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.74.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.75.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.75.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.75.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.76.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.76.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.76.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.77.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.77.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.77.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.78.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.78.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.78.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.79.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.79.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.79.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.80.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.80.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.80.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.81.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.81.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.81.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.82.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.82.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.82.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.83.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.83.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.83.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.84.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.84.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.84.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.85.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.85.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.85.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.86.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.86.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.86.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.87.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.87.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.87.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.88.down_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.88.gate_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.88.up_proj.weight": "model-00107-of-00172.safetensors", + "model.layers.34.mlp.experts.89.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.89.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.89.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.90.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.90.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.90.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.91.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.91.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.91.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.92.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.92.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.92.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.93.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.93.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.93.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.94.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.94.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.94.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.95.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.95.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.95.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.96.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.96.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.96.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.97.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.97.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.97.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.98.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.98.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.98.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.99.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.99.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.99.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.100.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.100.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.100.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.101.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.101.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.101.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.102.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.102.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.102.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.103.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.103.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.103.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.104.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.104.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.104.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.105.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.105.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.105.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.106.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.106.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.106.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.107.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.107.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.107.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.108.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.108.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.108.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.109.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.109.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.109.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.110.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.110.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.110.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.111.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.111.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.111.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.112.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.112.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.112.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.113.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.113.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.113.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.114.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.114.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.114.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.115.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.115.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.115.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.116.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.116.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.116.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.117.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.117.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.117.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.118.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.118.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.118.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.119.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.119.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.119.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.120.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.120.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.120.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.121.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.121.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.121.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.122.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.122.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.122.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.123.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.123.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.123.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.124.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.124.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.124.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.125.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.125.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.125.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.126.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.126.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.126.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.127.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.127.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.experts.127.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.gate.weight": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.gate.weight_1": "model-00108-of-00172.safetensors", + "model.layers.34.mlp.moe_statics.e_score_correction_bias": "model-00108-of-00172.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00108-of-00172.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.input_layernorm.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.0.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.0.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.1.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.1.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.2.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.2.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.3.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.3.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.4.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.4.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.5.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.5.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.6.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.6.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.7.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.7.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.8.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.8.up_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.9.down_proj.weight": "model-00108-of-00172.safetensors", + "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.9.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.10.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.10.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.11.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.11.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.12.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.12.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.13.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.13.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.14.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.14.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.15.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.15.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.16.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.16.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.16.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.17.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.17.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.17.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.18.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.18.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.18.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.19.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.19.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.19.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.20.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.20.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.20.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.21.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.21.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.21.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.22.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.22.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.22.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.23.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.23.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.23.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.24.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.24.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.24.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.25.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.25.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.25.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.26.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.26.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.26.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.27.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.27.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.27.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.28.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.28.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.28.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.29.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.29.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.29.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.30.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.30.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.30.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.31.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.31.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.31.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.32.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.32.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.32.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.33.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.33.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.33.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.34.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.34.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.34.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.35.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.35.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.35.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.36.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.36.gate_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.36.up_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.37.down_proj.weight": "model-00109-of-00172.safetensors", + "model.layers.35.mlp.experts.37.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.37.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.38.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.38.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.38.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.39.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.39.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.39.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.40.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.40.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.40.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.41.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.41.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.41.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.42.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.42.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.42.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.43.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.43.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.43.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.44.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.44.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.44.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.45.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.45.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.45.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.46.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.46.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.46.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.47.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.47.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.47.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.48.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.48.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.48.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.49.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.49.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.49.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.50.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.50.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.50.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.51.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.51.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.51.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.52.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.52.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.52.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.53.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.53.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.53.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.54.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.54.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.54.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.55.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.55.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.55.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.56.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.56.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.56.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.57.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.57.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.57.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.58.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.58.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.58.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.59.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.59.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.59.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.60.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.60.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.60.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.61.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.61.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.61.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.62.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.62.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.62.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.63.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.63.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.63.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.64.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.64.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.64.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.65.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.65.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.65.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.66.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.66.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.66.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.67.down_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.67.gate_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.67.up_proj.weight": "model-00110-of-00172.safetensors", + "model.layers.35.mlp.experts.68.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.68.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.68.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.69.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.69.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.69.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.70.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.70.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.70.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.71.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.71.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.71.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.72.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.72.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.72.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.73.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.73.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.73.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.74.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.74.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.74.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.75.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.75.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.75.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.76.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.76.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.76.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.77.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.77.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.77.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.78.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.78.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.78.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.79.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.79.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.79.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.80.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.80.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.80.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.81.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.81.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.81.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.82.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.82.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.82.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.83.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.83.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.83.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.84.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.84.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.84.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.85.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.85.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.85.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.86.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.86.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.86.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.87.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.87.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.87.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.88.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.88.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.88.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.89.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.89.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.89.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.90.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.90.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.90.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.91.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.91.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.91.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.92.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.92.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.92.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.93.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.93.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.93.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.94.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.94.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.94.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.95.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.95.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.95.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.96.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.96.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.96.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.97.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.97.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.97.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.98.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.98.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.98.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.99.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.99.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.99.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.100.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.100.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.100.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.101.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.101.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.101.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.102.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.102.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.102.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.103.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.103.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.103.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.104.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.104.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.104.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.105.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.105.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.105.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.106.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.106.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.106.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.107.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.107.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.107.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.108.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.108.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.108.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.109.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.109.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.109.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.110.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.110.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.110.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.111.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.111.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.111.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.112.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.112.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.112.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.113.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.113.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.113.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.114.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.114.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.114.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.115.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.115.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.115.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.116.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.116.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.116.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.117.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.117.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.117.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.118.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.118.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.118.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.119.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.119.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.119.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.120.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.120.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.120.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.121.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.121.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.121.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.122.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.122.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.122.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.123.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.123.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.123.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.124.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.124.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.124.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.125.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.125.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.125.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.126.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.126.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.126.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.127.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.127.gate_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.experts.127.up_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.gate.weight": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.gate.weight_1": "model-00111-of-00172.safetensors", + "model.layers.35.mlp.moe_statics.e_score_correction_bias": "model-00111-of-00172.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00111-of-00172.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.36.input_layernorm.weight": "model-00111-of-00172.safetensors", + "model.layers.36.mlp.experts.0.down_proj.weight": "model-00111-of-00172.safetensors", + "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.0.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.1.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.1.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.2.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.2.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.3.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.3.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.4.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.4.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.5.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.5.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.6.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.6.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.7.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.7.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.8.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.8.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.9.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.9.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.10.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.10.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.11.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.11.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.12.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.12.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.13.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.13.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.14.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.14.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.15.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.15.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.16.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.16.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.16.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.17.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.17.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.17.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.18.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.18.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.18.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.19.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.19.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.19.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.20.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.20.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.20.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.21.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.21.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.21.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.22.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.22.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.22.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.23.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.23.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.23.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.24.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.24.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.24.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.25.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.25.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.25.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.26.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.26.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.26.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.27.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.27.gate_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.27.up_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.28.down_proj.weight": "model-00112-of-00172.safetensors", + "model.layers.36.mlp.experts.28.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.28.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.29.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.29.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.29.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.30.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.30.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.30.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.31.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.31.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.31.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.32.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.32.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.32.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.33.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.33.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.33.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.34.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.34.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.34.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.35.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.35.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.35.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.36.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.36.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.36.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.37.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.37.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.37.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.38.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.38.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.38.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.39.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.39.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.39.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.40.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.40.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.40.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.41.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.41.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.41.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.42.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.42.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.42.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.43.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.43.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.43.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.44.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.44.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.44.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.45.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.45.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.45.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.46.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.46.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.46.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.47.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.47.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.47.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.48.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.48.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.48.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.49.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.49.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.49.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.50.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.50.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.50.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.51.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.51.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.51.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.52.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.52.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.52.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.53.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.53.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.53.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.54.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.54.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.54.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.55.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.55.gate_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.55.up_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.56.down_proj.weight": "model-00113-of-00172.safetensors", + "model.layers.36.mlp.experts.56.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.56.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.57.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.57.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.57.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.58.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.58.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.58.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.59.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.59.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.59.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.60.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.60.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.60.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.61.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.61.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.61.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.62.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.62.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.62.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.63.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.63.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.63.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.64.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.64.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.64.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.65.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.65.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.65.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.66.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.66.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.66.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.67.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.67.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.67.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.68.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.68.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.68.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.69.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.69.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.69.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.70.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.70.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.70.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.71.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.71.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.71.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.72.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.72.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.72.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.73.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.73.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.73.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.74.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.74.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.74.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.75.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.75.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.75.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.76.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.76.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.76.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.77.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.77.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.77.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.78.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.78.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.78.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.79.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.79.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.79.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.80.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.80.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.80.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.81.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.81.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.81.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.82.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.82.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.82.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.83.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.83.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.83.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.84.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.84.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.84.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.85.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.85.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.85.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.86.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.86.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.86.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.87.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.87.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.87.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.88.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.88.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.88.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.89.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.89.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.89.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.90.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.90.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.90.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.91.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.91.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.91.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.92.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.92.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.92.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.93.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.93.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.93.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.94.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.94.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.94.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.95.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.95.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.95.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.96.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.96.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.96.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.97.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.97.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.97.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.98.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.98.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.98.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.99.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.99.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.99.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.100.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.100.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.100.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.101.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.101.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.101.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.102.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.102.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.102.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.103.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.103.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.103.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.104.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.104.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.104.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.105.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.105.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.105.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.106.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.106.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.106.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.107.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.107.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.107.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.108.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.108.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.108.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.109.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.109.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.109.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.110.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.110.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.110.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.111.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.111.gate_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.111.up_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.112.down_proj.weight": "model-00114-of-00172.safetensors", + "model.layers.36.mlp.experts.112.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.112.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.113.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.113.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.113.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.114.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.114.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.114.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.115.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.115.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.115.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.116.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.116.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.116.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.117.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.117.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.117.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.118.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.118.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.118.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.119.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.119.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.119.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.120.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.120.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.120.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.121.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.121.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.121.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.122.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.122.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.122.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.123.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.123.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.123.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.124.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.124.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.124.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.125.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.125.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.125.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.126.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.126.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.126.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.127.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.127.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.experts.127.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.gate.weight": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.gate.weight_1": "model-00115-of-00172.safetensors", + "model.layers.36.mlp.moe_statics.e_score_correction_bias": "model-00115-of-00172.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00115-of-00172.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.input_layernorm.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.0.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.0.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.1.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.1.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.2.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.2.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.3.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.3.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.4.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.4.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.5.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.5.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.6.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.6.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.7.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.7.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.8.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.8.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.9.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.9.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.10.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.10.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.11.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.11.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.12.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.12.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.13.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.13.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.14.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.14.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.15.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.15.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.16.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.16.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.16.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.17.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.17.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.17.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.18.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.18.gate_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.18.up_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.19.down_proj.weight": "model-00115-of-00172.safetensors", + "model.layers.37.mlp.experts.19.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.19.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.20.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.20.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.20.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.21.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.21.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.21.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.22.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.22.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.22.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.23.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.23.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.23.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.24.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.24.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.24.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.25.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.25.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.25.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.26.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.26.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.26.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.27.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.27.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.27.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.28.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.28.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.28.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.29.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.29.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.29.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.30.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.30.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.30.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.31.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.31.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.31.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.32.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.32.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.32.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.33.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.33.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.33.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.34.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.34.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.34.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.35.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.35.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.35.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.36.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.36.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.36.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.37.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.37.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.37.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.38.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.38.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.38.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.39.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.39.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.39.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.40.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.40.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.40.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.41.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.41.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.41.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.42.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.42.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.42.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.43.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.43.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.43.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.44.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.44.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.44.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.45.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.45.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.45.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.46.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.46.gate_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.46.up_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.47.down_proj.weight": "model-00116-of-00172.safetensors", + "model.layers.37.mlp.experts.47.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.47.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.48.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.48.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.48.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.49.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.49.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.49.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.50.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.50.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.50.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.51.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.51.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.51.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.52.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.52.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.52.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.53.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.53.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.53.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.54.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.54.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.54.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.55.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.55.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.55.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.56.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.56.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.56.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.57.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.57.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.57.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.58.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.58.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.58.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.59.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.59.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.59.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.60.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.60.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.60.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.61.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.61.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.61.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.62.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.62.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.62.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.63.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.63.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.63.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.64.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.64.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.64.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.65.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.65.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.65.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.66.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.66.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.66.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.67.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.67.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.67.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.68.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.68.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.68.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.69.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.69.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.69.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.70.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.70.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.70.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.71.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.71.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.71.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.72.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.72.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.72.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.73.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.73.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.73.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.74.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.74.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.74.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.75.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.75.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.75.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.76.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.76.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.76.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.77.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.77.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.77.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.78.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.78.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.78.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.79.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.79.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.79.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.80.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.80.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.80.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.81.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.81.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.81.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.82.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.82.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.82.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.83.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.83.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.83.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.84.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.84.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.84.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.85.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.85.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.85.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.86.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.86.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.86.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.87.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.87.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.87.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.88.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.88.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.88.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.89.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.89.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.89.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.90.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.90.gate_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.90.up_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.91.down_proj.weight": "model-00117-of-00172.safetensors", + "model.layers.37.mlp.experts.91.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.91.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.92.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.92.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.92.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.93.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.93.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.93.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.94.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.94.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.94.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.95.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.95.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.95.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.96.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.96.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.96.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.97.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.97.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.97.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.98.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.98.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.98.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.99.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.99.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.99.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.100.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.100.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.100.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.101.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.101.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.101.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.102.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.102.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.102.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.103.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.103.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.103.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.104.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.104.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.104.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.105.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.105.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.105.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.106.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.106.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.106.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.107.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.107.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.107.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.108.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.108.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.108.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.109.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.109.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.109.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.110.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.110.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.110.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.111.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.111.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.111.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.112.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.112.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.112.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.113.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.113.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.113.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.114.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.114.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.114.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.115.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.115.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.115.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.116.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.116.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.116.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.117.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.117.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.117.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.118.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.118.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.118.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.119.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.119.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.119.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.120.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.120.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.120.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.121.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.121.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.121.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.122.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.122.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.122.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.123.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.123.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.123.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.124.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.124.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.124.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.125.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.125.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.125.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.126.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.126.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.126.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.127.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.127.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.experts.127.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.gate.weight": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.gate.weight_1": "model-00118-of-00172.safetensors", + "model.layers.37.mlp.moe_statics.e_score_correction_bias": "model-00118-of-00172.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00118-of-00172.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.input_layernorm.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.0.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.0.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.1.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.1.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.2.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.2.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.3.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.3.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.4.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.4.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.5.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.5.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.6.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.6.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.7.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.7.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.8.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.8.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.9.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.9.up_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.10.down_proj.weight": "model-00118-of-00172.safetensors", + "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.10.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.11.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.11.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.12.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.12.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.13.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.13.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.14.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.14.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.15.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.15.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.16.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.16.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.16.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.17.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.17.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.17.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.18.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.18.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.18.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.19.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.19.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.19.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.20.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.20.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.20.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.21.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.21.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.21.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.22.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.22.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.22.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.23.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.23.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.23.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.24.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.24.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.24.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.25.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.25.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.25.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.26.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.26.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.26.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.27.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.27.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.27.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.28.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.28.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.28.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.29.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.29.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.29.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.30.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.30.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.30.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.31.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.31.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.31.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.32.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.32.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.32.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.33.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.33.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.33.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.34.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.34.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.34.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.35.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.35.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.35.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.36.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.36.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.36.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.37.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.37.gate_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.37.up_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.38.down_proj.weight": "model-00119-of-00172.safetensors", + "model.layers.38.mlp.experts.38.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.38.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.39.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.39.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.39.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.40.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.40.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.40.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.41.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.41.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.41.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.42.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.42.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.42.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.43.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.43.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.43.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.44.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.44.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.44.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.45.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.45.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.45.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.46.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.46.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.46.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.47.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.47.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.47.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.48.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.48.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.48.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.49.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.49.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.49.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.50.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.50.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.50.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.51.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.51.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.51.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.52.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.52.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.52.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.53.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.53.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.53.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.54.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.54.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.54.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.55.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.55.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.55.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.56.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.56.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.56.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.57.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.57.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.57.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.58.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.58.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.58.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.59.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.59.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.59.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.60.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.60.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.60.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.61.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.61.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.61.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.62.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.62.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.62.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.63.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.63.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.63.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.64.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.64.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.64.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.65.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.65.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.65.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.66.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.66.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.66.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.67.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.67.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.67.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.68.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.68.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.68.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.69.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.69.gate_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.69.up_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.70.down_proj.weight": "model-00120-of-00172.safetensors", + "model.layers.38.mlp.experts.70.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.70.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.71.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.71.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.71.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.72.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.72.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.72.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.73.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.73.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.73.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.74.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.74.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.74.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.75.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.75.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.75.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.76.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.76.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.76.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.77.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.77.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.77.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.78.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.78.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.78.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.79.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.79.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.79.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.80.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.80.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.80.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.81.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.81.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.81.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.82.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.82.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.82.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.83.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.83.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.83.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.84.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.84.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.84.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.85.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.85.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.85.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.86.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.86.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.86.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.87.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.87.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.87.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.88.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.88.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.88.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.89.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.89.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.89.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.90.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.90.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.90.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.91.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.91.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.91.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.92.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.92.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.92.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.93.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.93.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.93.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.94.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.94.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.94.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.95.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.95.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.95.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.96.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.96.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.96.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.97.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.97.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.97.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.98.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.98.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.98.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.99.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.99.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.99.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.100.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.100.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.100.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.101.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.101.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.101.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.102.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.102.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.102.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.103.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.103.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.103.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.104.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.104.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.104.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.105.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.105.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.105.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.106.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.106.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.106.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.107.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.107.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.107.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.108.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.108.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.108.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.109.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.109.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.109.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.110.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.110.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.110.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.111.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.111.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.111.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.112.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.112.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.112.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.113.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.113.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.113.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.114.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.114.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.114.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.115.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.115.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.115.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.116.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.116.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.116.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.117.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.117.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.117.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.118.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.118.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.118.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.119.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.119.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.119.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.120.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.120.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.120.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.121.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.121.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.121.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.122.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.122.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.122.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.123.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.123.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.123.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.124.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.124.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.124.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.125.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.125.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.125.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.126.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.126.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.126.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.127.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.127.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.experts.127.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.gate.weight": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.gate.weight_1": "model-00121-of-00172.safetensors", + "model.layers.38.mlp.moe_statics.e_score_correction_bias": "model-00121-of-00172.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00121-of-00172.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.39.input_layernorm.weight": "model-00121-of-00172.safetensors", + "model.layers.39.mlp.experts.0.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.39.mlp.experts.0.up_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.39.mlp.experts.1.down_proj.weight": "model-00121-of-00172.safetensors", + "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.1.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.2.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.2.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.3.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.3.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.4.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.4.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.5.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.5.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.6.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.6.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.7.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.7.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.8.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.8.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.9.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.9.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.10.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.10.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.11.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.11.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.12.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.12.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.13.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.13.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.14.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.14.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.15.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.15.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.16.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.16.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.16.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.17.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.17.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.17.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.18.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.18.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.18.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.19.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.19.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.19.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.20.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.20.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.20.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.21.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.21.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.21.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.22.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.22.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.22.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.23.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.23.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.23.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.24.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.24.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.24.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.25.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.25.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.25.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.26.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.26.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.26.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.27.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.27.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.27.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.28.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.28.gate_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.28.up_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.29.down_proj.weight": "model-00122-of-00172.safetensors", + "model.layers.39.mlp.experts.29.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.29.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.30.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.30.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.30.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.31.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.31.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.31.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.32.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.32.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.32.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.33.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.33.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.33.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.34.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.34.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.34.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.35.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.35.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.35.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.36.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.36.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.36.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.37.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.37.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.37.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.38.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.38.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.38.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.39.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.39.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.39.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.40.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.40.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.40.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.41.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.41.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.41.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.42.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.42.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.42.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.43.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.43.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.43.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.44.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.44.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.44.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.45.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.45.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.45.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.46.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.46.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.46.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.47.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.47.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.47.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.48.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.48.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.48.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.49.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.49.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.49.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.50.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.50.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.50.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.51.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.51.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.51.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.52.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.52.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.52.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.53.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.53.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.53.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.54.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.54.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.54.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.55.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.55.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.55.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.56.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.56.gate_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.56.up_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.57.down_proj.weight": "model-00123-of-00172.safetensors", + "model.layers.39.mlp.experts.57.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.57.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.58.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.58.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.58.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.59.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.59.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.59.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.60.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.60.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.60.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.61.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.61.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.61.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.62.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.62.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.62.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.63.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.63.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.63.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.64.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.64.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.64.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.65.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.65.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.65.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.66.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.66.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.66.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.67.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.67.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.67.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.68.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.68.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.68.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.69.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.69.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.69.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.70.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.70.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.70.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.71.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.71.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.71.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.72.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.72.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.72.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.73.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.73.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.73.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.74.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.74.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.74.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.75.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.75.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.75.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.76.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.76.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.76.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.77.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.77.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.77.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.78.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.78.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.78.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.79.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.79.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.79.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.80.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.80.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.80.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.81.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.81.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.81.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.82.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.82.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.82.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.83.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.83.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.83.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.84.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.84.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.84.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.85.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.85.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.85.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.86.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.86.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.86.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.87.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.87.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.87.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.88.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.88.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.88.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.89.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.89.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.89.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.90.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.90.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.90.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.91.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.91.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.91.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.92.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.92.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.92.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.93.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.93.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.93.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.94.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.94.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.94.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.95.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.95.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.95.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.96.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.96.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.96.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.97.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.97.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.97.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.98.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.98.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.98.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.99.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.99.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.99.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.100.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.100.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.100.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.101.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.101.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.101.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.102.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.102.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.102.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.103.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.103.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.103.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.104.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.104.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.104.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.105.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.105.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.105.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.106.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.106.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.106.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.107.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.107.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.107.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.108.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.108.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.108.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.109.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.109.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.109.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.110.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.110.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.110.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.111.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.111.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.111.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.112.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.112.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.112.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.113.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.113.gate_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.113.up_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.114.down_proj.weight": "model-00124-of-00172.safetensors", + "model.layers.39.mlp.experts.114.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.114.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.115.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.115.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.115.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.116.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.116.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.116.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.117.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.117.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.117.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.118.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.118.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.118.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.119.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.119.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.119.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.120.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.120.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.120.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.121.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.121.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.121.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.122.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.122.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.122.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.123.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.123.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.123.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.124.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.124.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.124.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.125.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.125.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.125.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.126.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.126.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.126.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.127.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.127.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.experts.127.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.gate.weight": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.gate.weight_1": "model-00125-of-00172.safetensors", + "model.layers.39.mlp.moe_statics.e_score_correction_bias": "model-00125-of-00172.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00125-of-00172.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.input_layernorm.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.0.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.0.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.1.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.1.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.2.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.2.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.3.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.3.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.4.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.4.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.5.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.5.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.6.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.6.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.7.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.7.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.8.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.8.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.9.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.9.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.10.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.10.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.11.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.11.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.12.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.12.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.13.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.13.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.14.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.14.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.15.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.15.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.16.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.16.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.16.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.17.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.17.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.17.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.18.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.18.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.18.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.19.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.19.gate_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.19.up_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.20.down_proj.weight": "model-00125-of-00172.safetensors", + "model.layers.40.mlp.experts.20.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.20.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.21.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.21.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.21.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.22.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.22.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.22.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.23.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.23.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.23.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.24.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.24.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.24.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.25.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.25.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.25.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.26.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.26.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.26.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.27.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.27.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.27.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.28.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.28.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.28.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.29.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.29.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.29.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.30.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.30.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.30.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.31.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.31.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.31.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.32.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.32.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.32.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.33.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.33.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.33.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.34.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.34.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.34.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.35.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.35.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.35.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.36.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.36.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.36.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.37.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.37.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.37.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.38.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.38.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.38.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.39.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.39.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.39.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.40.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.40.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.40.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.41.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.41.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.41.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.42.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.42.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.42.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.43.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.43.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.43.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.44.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.44.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.44.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.45.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.45.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.45.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.46.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.46.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.46.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.47.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.47.gate_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.47.up_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.48.down_proj.weight": "model-00126-of-00172.safetensors", + "model.layers.40.mlp.experts.48.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.48.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.49.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.49.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.49.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.50.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.50.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.50.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.51.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.51.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.51.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.52.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.52.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.52.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.53.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.53.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.53.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.54.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.54.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.54.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.55.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.55.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.55.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.56.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.56.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.56.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.57.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.57.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.57.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.58.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.58.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.58.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.59.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.59.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.59.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.60.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.60.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.60.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.61.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.61.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.61.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.62.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.62.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.62.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.63.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.63.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.63.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.64.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.64.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.64.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.65.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.65.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.65.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.66.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.66.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.66.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.67.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.67.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.67.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.68.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.68.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.68.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.69.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.69.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.69.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.70.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.70.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.70.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.71.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.71.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.71.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.72.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.72.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.72.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.73.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.73.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.73.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.74.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.74.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.74.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.75.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.75.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.75.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.76.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.76.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.76.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.77.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.77.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.77.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.78.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.78.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.78.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.79.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.79.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.79.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.80.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.80.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.80.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.81.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.81.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.81.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.82.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.82.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.82.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.83.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.83.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.83.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.84.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.84.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.84.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.85.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.85.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.85.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.86.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.86.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.86.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.87.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.87.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.87.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.88.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.88.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.88.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.89.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.89.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.89.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.90.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.90.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.90.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.91.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.91.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.91.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.92.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.92.gate_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.92.up_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.93.down_proj.weight": "model-00127-of-00172.safetensors", + "model.layers.40.mlp.experts.93.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.93.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.94.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.94.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.94.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.95.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.95.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.95.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.96.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.96.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.96.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.97.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.97.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.97.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.98.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.98.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.98.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.99.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.99.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.99.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.100.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.100.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.100.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.101.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.101.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.101.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.102.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.102.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.102.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.103.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.103.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.103.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.104.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.104.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.104.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.105.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.105.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.105.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.106.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.106.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.106.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.107.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.107.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.107.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.108.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.108.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.108.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.109.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.109.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.109.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.110.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.110.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.110.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.111.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.111.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.111.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.112.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.112.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.112.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.113.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.113.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.113.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.114.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.114.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.114.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.115.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.115.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.115.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.116.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.116.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.116.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.117.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.117.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.117.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.118.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.118.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.118.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.119.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.119.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.119.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.120.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.120.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.120.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.121.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.121.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.121.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.122.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.122.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.122.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.123.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.123.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.123.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.124.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.124.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.124.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.125.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.125.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.125.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.126.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.126.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.126.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.127.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.127.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.experts.127.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.gate.weight": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.gate.weight_1": "model-00128-of-00172.safetensors", + "model.layers.40.mlp.moe_statics.e_score_correction_bias": "model-00128-of-00172.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00128-of-00172.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.input_layernorm.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.0.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.0.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.1.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.1.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.2.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.2.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.3.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.3.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.4.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.4.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.5.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.5.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.6.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.6.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.7.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.7.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.8.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.8.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.9.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.9.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.10.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.10.up_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.11.down_proj.weight": "model-00128-of-00172.safetensors", + "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.11.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.12.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.12.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.13.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.13.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.14.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.14.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.15.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.15.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.16.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.16.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.16.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.17.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.17.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.17.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.18.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.18.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.18.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.19.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.19.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.19.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.20.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.20.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.20.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.21.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.21.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.21.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.22.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.22.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.22.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.23.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.23.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.23.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.24.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.24.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.24.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.25.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.25.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.25.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.26.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.26.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.26.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.27.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.27.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.27.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.28.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.28.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.28.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.29.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.29.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.29.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.30.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.30.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.30.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.31.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.31.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.31.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.32.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.32.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.32.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.33.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.33.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.33.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.34.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.34.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.34.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.35.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.35.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.35.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.36.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.36.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.36.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.37.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.37.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.37.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.38.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.38.gate_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.38.up_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.39.down_proj.weight": "model-00129-of-00172.safetensors", + "model.layers.41.mlp.experts.39.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.39.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.40.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.40.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.40.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.41.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.41.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.41.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.42.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.42.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.42.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.43.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.43.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.43.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.44.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.44.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.44.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.45.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.45.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.45.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.46.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.46.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.46.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.47.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.47.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.47.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.48.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.48.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.48.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.49.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.49.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.49.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.50.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.50.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.50.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.51.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.51.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.51.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.52.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.52.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.52.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.53.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.53.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.53.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.54.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.54.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.54.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.55.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.55.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.55.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.56.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.56.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.56.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.57.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.57.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.57.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.58.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.58.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.58.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.59.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.59.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.59.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.60.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.60.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.60.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.61.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.61.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.61.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.62.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.62.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.62.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.63.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.63.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.63.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.64.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.64.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.64.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.65.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.65.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.65.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.66.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.66.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.66.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.67.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.67.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.67.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.68.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.68.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.68.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.69.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.69.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.69.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.70.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.70.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.70.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.71.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.71.gate_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.71.up_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.72.down_proj.weight": "model-00130-of-00172.safetensors", + "model.layers.41.mlp.experts.72.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.72.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.73.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.73.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.73.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.74.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.74.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.74.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.75.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.75.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.75.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.76.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.76.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.76.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.77.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.77.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.77.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.78.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.78.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.78.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.79.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.79.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.79.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.80.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.80.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.80.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.81.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.81.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.81.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.82.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.82.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.82.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.83.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.83.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.83.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.84.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.84.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.84.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.85.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.85.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.85.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.86.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.86.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.86.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.87.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.87.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.87.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.88.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.88.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.88.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.89.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.89.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.89.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.90.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.90.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.90.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.91.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.91.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.91.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.92.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.92.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.92.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.93.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.93.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.93.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.94.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.94.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.94.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.95.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.95.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.95.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.96.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.96.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.96.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.97.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.97.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.97.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.98.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.98.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.98.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.99.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.99.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.99.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.100.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.100.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.100.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.101.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.101.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.101.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.102.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.102.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.102.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.103.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.103.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.103.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.104.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.104.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.104.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.105.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.105.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.105.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.106.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.106.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.106.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.107.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.107.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.107.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.108.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.108.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.108.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.109.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.109.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.109.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.110.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.110.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.110.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.111.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.111.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.111.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.112.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.112.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.112.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.113.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.113.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.113.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.114.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.114.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.114.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.115.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.115.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.115.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.116.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.116.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.116.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.117.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.117.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.117.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.118.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.118.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.118.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.119.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.119.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.119.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.120.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.120.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.120.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.121.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.121.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.121.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.122.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.122.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.122.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.123.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.123.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.123.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.124.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.124.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.124.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.125.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.125.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.125.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.126.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.126.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.126.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.127.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.127.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.experts.127.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.gate.weight": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.gate.weight_1": "model-00131-of-00172.safetensors", + "model.layers.41.mlp.moe_statics.e_score_correction_bias": "model-00131-of-00172.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00131-of-00172.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.input_layernorm.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.0.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.0.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.1.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.1.up_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.2.down_proj.weight": "model-00131-of-00172.safetensors", + "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.2.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.3.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.3.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.4.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.4.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.5.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.5.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.6.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.6.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.7.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.7.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.8.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.8.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.9.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.9.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.10.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.10.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.11.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.11.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.12.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.12.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.13.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.13.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.14.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.14.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.15.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.15.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.16.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.16.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.16.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.17.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.17.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.17.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.18.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.18.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.18.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.19.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.19.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.19.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.20.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.20.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.20.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.21.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.21.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.21.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.22.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.22.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.22.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.23.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.23.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.23.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.24.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.24.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.24.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.25.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.25.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.25.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.26.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.26.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.26.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.27.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.27.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.27.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.28.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.28.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.28.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.29.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.29.gate_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.29.up_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.30.down_proj.weight": "model-00132-of-00172.safetensors", + "model.layers.42.mlp.experts.30.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.30.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.31.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.31.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.31.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.32.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.32.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.32.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.33.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.33.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.33.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.34.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.34.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.34.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.35.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.35.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.35.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.36.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.36.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.36.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.37.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.37.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.37.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.38.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.38.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.38.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.39.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.39.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.39.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.40.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.40.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.40.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.41.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.41.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.41.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.42.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.42.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.42.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.43.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.43.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.43.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.44.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.44.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.44.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.45.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.45.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.45.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.46.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.46.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.46.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.47.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.47.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.47.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.48.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.48.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.48.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.49.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.49.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.49.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.50.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.50.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.50.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.51.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.51.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.51.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.52.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.52.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.52.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.53.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.53.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.53.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.54.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.54.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.54.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.55.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.55.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.55.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.56.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.56.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.56.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.57.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.57.gate_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.57.up_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.58.down_proj.weight": "model-00133-of-00172.safetensors", + "model.layers.42.mlp.experts.58.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.58.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.59.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.59.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.59.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.60.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.60.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.60.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.61.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.61.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.61.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.62.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.62.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.62.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.63.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.63.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.63.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.64.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.64.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.64.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.65.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.65.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.65.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.66.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.66.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.66.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.67.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.67.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.67.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.68.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.68.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.68.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.69.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.69.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.69.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.70.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.70.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.70.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.71.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.71.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.71.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.72.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.72.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.72.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.73.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.73.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.73.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.74.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.74.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.74.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.75.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.75.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.75.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.76.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.76.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.76.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.77.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.77.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.77.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.78.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.78.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.78.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.79.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.79.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.79.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.80.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.80.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.80.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.81.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.81.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.81.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.82.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.82.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.82.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.83.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.83.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.83.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.84.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.84.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.84.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.85.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.85.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.85.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.86.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.86.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.86.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.87.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.87.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.87.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.88.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.88.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.88.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.89.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.89.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.89.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.90.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.90.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.90.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.91.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.91.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.91.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.92.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.92.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.92.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.93.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.93.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.93.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.94.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.94.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.94.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.95.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.95.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.95.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.96.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.96.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.96.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.97.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.97.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.97.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.98.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.98.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.98.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.99.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.99.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.99.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.100.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.100.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.100.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.101.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.101.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.101.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.102.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.102.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.102.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.103.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.103.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.103.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.104.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.104.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.104.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.105.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.105.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.105.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.106.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.106.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.106.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.107.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.107.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.107.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.108.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.108.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.108.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.109.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.109.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.109.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.110.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.110.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.110.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.111.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.111.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.111.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.112.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.112.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.112.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.113.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.113.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.113.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.114.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.114.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.114.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.115.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.115.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.115.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.116.down_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.116.gate_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.116.up_proj.weight": "model-00134-of-00172.safetensors", + "model.layers.42.mlp.experts.117.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.117.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.117.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.118.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.118.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.118.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.119.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.119.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.119.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.120.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.120.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.120.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.121.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.121.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.121.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.122.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.122.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.122.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.123.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.123.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.123.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.124.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.124.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.124.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.125.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.125.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.125.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.126.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.126.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.126.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.127.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.127.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.experts.127.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.gate.weight": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.gate.weight_1": "model-00135-of-00172.safetensors", + "model.layers.42.mlp.moe_statics.e_score_correction_bias": "model-00135-of-00172.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00135-of-00172.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.input_layernorm.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.0.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.0.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.1.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.1.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.2.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.2.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.3.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.3.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.4.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.4.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.5.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.5.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.6.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.6.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.7.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.7.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.8.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.8.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.9.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.9.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.10.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.10.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.11.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.11.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.12.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.12.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.13.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.13.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.14.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.14.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.15.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.15.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.16.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.16.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.16.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.17.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.17.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.17.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.18.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.18.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.18.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.19.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.19.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.19.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.20.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.20.gate_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.20.up_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.21.down_proj.weight": "model-00135-of-00172.safetensors", + "model.layers.43.mlp.experts.21.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.21.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.22.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.22.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.22.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.23.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.23.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.23.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.24.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.24.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.24.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.25.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.25.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.25.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.26.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.26.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.26.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.27.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.27.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.27.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.28.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.28.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.28.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.29.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.29.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.29.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.30.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.30.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.30.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.31.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.31.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.31.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.32.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.32.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.32.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.33.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.33.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.33.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.34.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.34.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.34.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.35.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.35.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.35.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.36.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.36.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.36.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.37.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.37.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.37.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.38.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.38.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.38.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.39.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.39.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.39.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.40.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.40.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.40.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.41.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.41.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.41.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.42.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.42.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.42.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.43.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.43.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.43.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.44.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.44.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.44.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.45.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.45.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.45.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.46.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.46.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.46.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.47.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.47.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.47.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.48.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.48.gate_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.48.up_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.49.down_proj.weight": "model-00136-of-00172.safetensors", + "model.layers.43.mlp.experts.49.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.49.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.50.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.50.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.50.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.51.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.51.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.51.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.52.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.52.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.52.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.53.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.53.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.53.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.54.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.54.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.54.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.55.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.55.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.55.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.56.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.56.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.56.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.57.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.57.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.57.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.58.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.58.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.58.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.59.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.59.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.59.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.60.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.60.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.60.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.61.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.61.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.61.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.62.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.62.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.62.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.63.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.63.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.63.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.64.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.64.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.64.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.65.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.65.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.65.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.66.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.66.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.66.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.67.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.67.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.67.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.68.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.68.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.68.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.69.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.69.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.69.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.70.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.70.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.70.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.71.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.71.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.71.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.72.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.72.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.72.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.73.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.73.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.73.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.74.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.74.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.74.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.75.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.75.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.75.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.76.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.76.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.76.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.77.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.77.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.77.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.78.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.78.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.78.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.79.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.79.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.79.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.80.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.80.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.80.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.81.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.81.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.81.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.82.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.82.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.82.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.83.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.83.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.83.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.84.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.84.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.84.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.85.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.85.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.85.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.86.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.86.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.86.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.87.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.87.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.87.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.88.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.88.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.88.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.89.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.89.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.89.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.90.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.90.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.90.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.91.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.91.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.91.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.92.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.92.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.92.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.93.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.93.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.93.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.94.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.94.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.94.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.95.down_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.95.gate_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.95.up_proj.weight": "model-00137-of-00172.safetensors", + "model.layers.43.mlp.experts.96.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.96.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.96.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.97.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.97.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.97.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.98.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.98.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.98.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.99.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.99.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.99.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.100.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.100.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.100.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.101.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.101.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.101.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.102.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.102.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.102.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.103.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.103.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.103.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.104.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.104.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.104.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.105.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.105.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.105.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.106.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.106.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.106.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.107.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.107.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.107.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.108.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.108.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.108.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.109.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.109.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.109.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.110.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.110.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.110.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.111.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.111.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.111.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.112.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.112.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.112.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.113.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.113.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.113.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.114.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.114.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.114.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.115.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.115.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.115.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.116.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.116.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.116.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.117.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.117.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.117.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.118.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.118.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.118.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.119.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.119.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.119.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.120.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.120.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.120.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.121.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.121.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.121.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.122.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.122.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.122.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.123.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.123.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.123.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.124.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.124.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.124.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.125.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.125.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.125.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.126.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.126.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.126.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.127.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.127.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.experts.127.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.gate.weight": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.gate.weight_1": "model-00138-of-00172.safetensors", + "model.layers.43.mlp.moe_statics.e_score_correction_bias": "model-00138-of-00172.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00138-of-00172.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.input_layernorm.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.0.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.0.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.1.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.1.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.2.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.2.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.3.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.3.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.4.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.4.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.5.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.5.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.6.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.6.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.7.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.7.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.8.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.8.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.9.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.9.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.10.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.10.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.11.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.11.up_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.12.down_proj.weight": "model-00138-of-00172.safetensors", + "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.12.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.13.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.13.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.14.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.14.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.15.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.15.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.16.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.16.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.16.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.17.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.17.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.17.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.18.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.18.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.18.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.19.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.19.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.19.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.20.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.20.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.20.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.21.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.21.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.21.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.22.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.22.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.22.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.23.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.23.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.23.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.24.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.24.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.24.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.25.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.25.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.25.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.26.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.26.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.26.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.27.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.27.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.27.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.28.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.28.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.28.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.29.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.29.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.29.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.30.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.30.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.30.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.31.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.31.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.31.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.32.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.32.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.32.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.33.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.33.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.33.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.34.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.34.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.34.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.35.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.35.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.35.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.36.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.36.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.36.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.37.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.37.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.37.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.38.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.38.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.38.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.39.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.39.gate_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.39.up_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.40.down_proj.weight": "model-00139-of-00172.safetensors", + "model.layers.44.mlp.experts.40.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.40.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.41.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.41.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.41.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.42.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.42.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.42.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.43.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.43.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.43.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.44.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.44.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.44.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.45.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.45.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.45.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.46.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.46.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.46.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.47.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.47.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.47.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.48.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.48.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.48.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.49.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.49.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.49.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.50.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.50.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.50.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.51.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.51.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.51.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.52.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.52.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.52.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.53.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.53.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.53.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.54.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.54.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.54.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.55.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.55.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.55.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.56.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.56.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.56.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.57.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.57.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.57.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.58.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.58.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.58.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.59.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.59.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.59.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.60.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.60.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.60.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.61.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.61.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.61.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.62.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.62.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.62.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.63.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.63.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.63.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.64.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.64.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.64.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.65.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.65.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.65.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.66.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.66.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.66.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.67.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.67.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.67.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.68.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.68.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.68.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.69.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.69.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.69.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.70.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.70.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.70.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.71.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.71.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.71.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.72.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.72.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.72.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.73.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.73.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.73.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.74.down_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.74.gate_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.74.up_proj.weight": "model-00140-of-00172.safetensors", + "model.layers.44.mlp.experts.75.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.75.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.75.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.76.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.76.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.76.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.77.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.77.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.77.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.78.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.78.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.78.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.79.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.79.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.79.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.80.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.80.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.80.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.81.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.81.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.81.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.82.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.82.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.82.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.83.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.83.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.83.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.84.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.84.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.84.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.85.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.85.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.85.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.86.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.86.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.86.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.87.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.87.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.87.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.88.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.88.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.88.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.89.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.89.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.89.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.90.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.90.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.90.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.91.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.91.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.91.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.92.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.92.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.92.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.93.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.93.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.93.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.94.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.94.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.94.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.95.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.95.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.95.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.96.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.96.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.96.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.97.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.97.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.97.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.98.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.98.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.98.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.99.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.99.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.99.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.100.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.100.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.100.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.101.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.101.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.101.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.102.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.102.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.102.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.103.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.103.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.103.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.104.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.104.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.104.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.105.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.105.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.105.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.106.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.106.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.106.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.107.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.107.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.107.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.108.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.108.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.108.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.109.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.109.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.109.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.110.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.110.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.110.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.111.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.111.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.111.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.112.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.112.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.112.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.113.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.113.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.113.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.114.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.114.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.114.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.115.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.115.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.115.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.116.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.116.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.116.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.117.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.117.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.117.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.118.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.118.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.118.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.119.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.119.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.119.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.120.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.120.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.120.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.121.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.121.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.121.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.122.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.122.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.122.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.123.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.123.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.123.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.124.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.124.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.124.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.125.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.125.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.125.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.126.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.126.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.126.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.127.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.127.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.experts.127.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.gate.weight": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.gate.weight_1": "model-00141-of-00172.safetensors", + "model.layers.44.mlp.moe_statics.e_score_correction_bias": "model-00141-of-00172.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00141-of-00172.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.input_layernorm.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.0.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.0.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.1.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.1.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.2.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.2.up_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.3.down_proj.weight": "model-00141-of-00172.safetensors", + "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.3.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.4.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.4.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.5.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.5.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.6.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.6.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.7.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.7.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.8.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.8.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.9.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.9.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.10.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.10.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.11.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.11.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.12.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.12.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.13.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.13.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.14.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.14.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.15.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.15.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.16.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.16.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.16.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.17.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.17.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.17.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.18.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.18.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.18.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.19.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.19.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.19.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.20.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.20.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.20.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.21.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.21.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.21.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.22.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.22.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.22.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.23.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.23.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.23.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.24.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.24.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.24.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.25.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.25.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.25.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.26.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.26.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.26.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.27.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.27.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.27.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.28.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.28.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.28.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.29.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.29.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.29.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.30.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.30.gate_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.30.up_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.31.down_proj.weight": "model-00142-of-00172.safetensors", + "model.layers.45.mlp.experts.31.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.31.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.32.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.32.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.32.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.33.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.33.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.33.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.34.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.34.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.34.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.35.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.35.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.35.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.36.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.36.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.36.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.37.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.37.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.37.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.38.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.38.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.38.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.39.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.39.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.39.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.40.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.40.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.40.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.41.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.41.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.41.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.42.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.42.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.42.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.43.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.43.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.43.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.44.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.44.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.44.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.45.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.45.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.45.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.46.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.46.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.46.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.47.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.47.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.47.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.48.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.48.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.48.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.49.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.49.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.49.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.50.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.50.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.50.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.51.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.51.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.51.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.52.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.52.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.52.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.53.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.53.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.53.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.54.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.54.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.54.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.55.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.55.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.55.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.56.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.56.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.56.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.57.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.57.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.57.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.58.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.58.gate_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.58.up_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.59.down_proj.weight": "model-00143-of-00172.safetensors", + "model.layers.45.mlp.experts.59.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.59.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.60.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.60.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.60.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.61.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.61.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.61.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.62.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.62.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.62.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.63.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.63.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.63.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.64.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.64.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.64.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.65.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.65.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.65.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.66.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.66.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.66.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.67.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.67.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.67.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.68.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.68.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.68.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.69.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.69.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.69.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.70.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.70.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.70.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.71.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.71.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.71.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.72.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.72.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.72.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.73.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.73.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.73.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.74.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.74.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.74.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.75.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.75.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.75.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.76.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.76.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.76.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.77.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.77.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.77.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.78.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.78.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.78.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.79.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.79.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.79.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.80.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.80.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.80.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.81.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.81.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.81.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.82.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.82.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.82.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.83.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.83.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.83.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.84.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.84.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.84.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.85.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.85.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.85.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.86.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.86.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.86.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.87.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.87.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.87.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.88.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.88.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.88.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.89.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.89.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.89.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.90.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.90.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.90.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.91.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.91.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.91.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.92.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.92.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.92.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.93.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.93.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.93.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.94.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.94.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.94.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.95.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.95.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.95.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.96.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.96.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.96.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.97.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.97.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.97.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.98.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.98.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.98.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.99.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.99.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.99.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.100.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.100.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.100.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.101.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.101.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.101.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.102.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.102.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.102.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.103.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.103.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.103.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.104.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.104.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.104.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.105.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.105.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.105.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.106.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.106.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.106.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.107.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.107.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.107.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.108.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.108.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.108.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.109.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.109.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.109.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.110.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.110.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.110.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.111.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.111.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.111.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.112.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.112.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.112.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.113.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.113.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.113.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.114.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.114.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.114.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.115.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.115.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.115.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.116.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.116.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.116.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.117.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.117.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.117.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.118.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.118.gate_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.118.up_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.119.down_proj.weight": "model-00144-of-00172.safetensors", + "model.layers.45.mlp.experts.119.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.119.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.120.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.120.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.120.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.121.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.121.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.121.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.122.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.122.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.122.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.123.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.123.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.123.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.124.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.124.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.124.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.125.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.125.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.125.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.126.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.126.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.126.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.127.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.127.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.experts.127.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.gate.weight": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.gate.weight_1": "model-00145-of-00172.safetensors", + "model.layers.45.mlp.moe_statics.e_score_correction_bias": "model-00145-of-00172.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00145-of-00172.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.input_layernorm.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.0.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.0.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.1.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.1.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.2.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.2.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.3.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.3.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.4.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.4.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.5.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.5.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.6.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.6.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.7.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.7.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.8.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.8.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.9.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.9.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.10.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.10.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.11.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.11.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.12.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.12.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.13.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.13.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.14.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.14.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.15.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.15.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.16.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.16.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.17.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.17.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.18.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.18.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.19.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.19.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.20.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.20.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.21.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.21.up_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.22.down_proj.weight": "model-00145-of-00172.safetensors", + "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.22.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.23.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.23.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.24.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.24.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.25.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.25.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.26.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.26.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.27.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.27.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.28.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.28.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.29.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.29.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.30.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.30.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.31.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.31.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.32.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.32.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.33.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.33.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.34.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.34.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.35.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.35.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.36.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.36.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.37.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.37.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.38.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.38.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.39.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.39.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.40.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.40.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.41.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.41.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.42.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.42.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.43.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.43.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.44.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.44.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.45.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.45.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.46.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.46.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.47.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.47.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.48.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.48.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.49.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.49.up_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.50.down_proj.weight": "model-00146-of-00172.safetensors", + "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.50.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.51.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.51.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.52.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.52.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.53.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.53.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.54.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.54.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.55.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.55.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.56.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.56.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.57.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.57.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.58.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.58.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.59.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.59.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.60.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.60.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.61.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.61.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.62.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.62.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.63.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.63.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.64.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.64.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.64.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.65.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.65.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.65.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.66.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.66.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.66.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.67.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.67.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.67.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.68.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.68.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.68.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.69.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.69.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.69.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.70.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.70.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.70.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.71.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.71.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.71.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.72.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.72.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.72.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.73.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.73.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.73.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.74.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.74.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.74.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.75.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.75.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.75.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.76.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.76.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.76.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.77.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.77.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.77.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.78.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.78.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.78.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.79.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.79.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.79.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.80.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.80.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.80.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.81.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.81.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.81.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.82.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.82.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.82.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.83.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.83.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.83.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.84.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.84.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.84.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.85.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.85.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.85.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.86.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.86.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.86.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.87.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.87.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.87.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.88.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.88.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.88.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.89.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.89.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.89.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.90.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.90.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.90.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.91.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.91.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.91.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.92.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.92.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.92.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.93.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.93.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.93.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.94.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.94.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.94.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.95.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.95.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.95.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.96.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.96.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.96.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.97.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.97.gate_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.97.up_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.98.down_proj.weight": "model-00147-of-00172.safetensors", + "model.layers.46.mlp.experts.98.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.98.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.99.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.99.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.99.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.100.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.100.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.100.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.101.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.101.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.101.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.102.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.102.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.102.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.103.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.103.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.103.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.104.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.104.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.104.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.105.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.105.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.105.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.106.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.106.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.106.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.107.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.107.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.107.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.108.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.108.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.108.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.109.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.109.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.109.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.110.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.110.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.110.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.111.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.111.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.111.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.112.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.112.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.112.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.113.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.113.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.113.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.114.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.114.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.114.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.115.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.115.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.115.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.116.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.116.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.116.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.117.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.117.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.117.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.118.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.118.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.118.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.119.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.119.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.119.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.120.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.120.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.120.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.121.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.121.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.121.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.122.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.122.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.122.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.123.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.123.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.123.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.124.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.124.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.124.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.125.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.125.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.125.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.126.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.126.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.126.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.127.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.127.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.experts.127.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.gate.weight": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.gate.weight_1": "model-00148-of-00172.safetensors", + "model.layers.46.mlp.moe_statics.e_score_correction_bias": "model-00148-of-00172.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00148-of-00172.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.input_layernorm.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.0.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.0.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.1.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.1.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.2.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.2.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.3.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.3.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.4.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.4.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.5.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.5.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.6.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.6.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.7.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.7.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.8.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.8.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.9.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.9.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.10.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.10.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.11.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.11.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.12.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.12.up_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.13.down_proj.weight": "model-00148-of-00172.safetensors", + "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.13.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.14.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.14.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.15.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.15.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.16.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.16.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.16.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.17.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.17.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.17.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.18.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.18.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.18.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.19.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.19.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.19.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.20.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.20.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.20.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.21.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.21.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.21.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.22.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.22.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.22.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.23.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.23.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.23.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.24.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.24.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.24.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.25.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.25.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.25.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.26.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.26.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.26.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.27.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.27.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.27.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.28.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.28.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.28.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.29.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.29.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.29.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.30.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.30.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.30.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.31.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.31.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.31.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.32.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.32.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.32.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.33.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.33.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.33.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.34.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.34.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.34.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.35.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.35.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.35.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.36.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.36.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.36.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.37.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.37.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.37.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.38.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.38.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.38.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.39.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.39.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.39.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.40.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.40.gate_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.40.up_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.41.down_proj.weight": "model-00149-of-00172.safetensors", + "model.layers.47.mlp.experts.41.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.41.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.42.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.42.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.42.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.43.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.43.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.43.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.44.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.44.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.44.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.45.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.45.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.45.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.46.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.46.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.46.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.47.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.47.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.47.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.48.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.48.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.48.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.49.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.49.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.49.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.50.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.50.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.50.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.51.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.51.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.51.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.52.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.52.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.52.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.53.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.53.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.53.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.54.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.54.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.54.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.55.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.55.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.55.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.56.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.56.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.56.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.57.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.57.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.57.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.58.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.58.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.58.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.59.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.59.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.59.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.60.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.60.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.60.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.61.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.61.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.61.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.62.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.62.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.62.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.63.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.63.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.63.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.64.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.64.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.64.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.65.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.65.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.65.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.66.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.66.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.66.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.67.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.67.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.67.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.68.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.68.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.68.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.69.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.69.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.69.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.70.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.70.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.70.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.71.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.71.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.71.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.72.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.72.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.72.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.73.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.73.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.73.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.74.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.74.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.74.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.75.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.75.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.75.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.76.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.76.gate_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.76.up_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.77.down_proj.weight": "model-00150-of-00172.safetensors", + "model.layers.47.mlp.experts.77.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.77.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.78.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.78.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.78.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.79.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.79.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.79.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.80.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.80.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.80.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.81.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.81.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.81.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.82.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.82.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.82.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.83.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.83.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.83.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.84.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.84.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.84.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.85.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.85.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.85.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.86.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.86.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.86.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.87.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.87.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.87.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.88.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.88.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.88.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.89.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.89.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.89.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.90.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.90.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.90.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.91.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.91.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.91.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.92.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.92.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.92.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.93.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.93.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.93.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.94.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.94.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.94.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.95.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.95.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.95.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.96.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.96.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.96.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.97.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.97.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.97.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.98.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.98.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.98.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.99.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.99.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.99.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.100.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.100.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.100.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.101.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.101.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.101.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.102.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.102.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.102.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.103.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.103.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.103.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.104.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.104.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.104.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.105.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.105.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.105.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.106.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.106.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.106.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.107.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.107.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.107.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.108.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.108.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.108.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.109.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.109.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.109.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.110.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.110.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.110.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.111.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.111.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.111.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.112.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.112.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.112.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.113.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.113.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.113.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.114.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.114.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.114.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.115.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.115.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.115.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.116.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.116.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.116.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.117.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.117.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.117.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.118.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.118.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.118.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.119.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.119.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.119.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.120.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.120.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.120.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.121.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.121.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.121.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.122.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.122.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.122.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.123.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.123.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.123.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.124.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.124.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.124.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.125.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.125.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.125.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.126.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.126.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.126.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.127.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.127.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.experts.127.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.gate.weight": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.gate.weight_1": "model-00151-of-00172.safetensors", + "model.layers.47.mlp.moe_statics.e_score_correction_bias": "model-00151-of-00172.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00151-of-00172.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.input_layernorm.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.0.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.0.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.0.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.1.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.1.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.1.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.2.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.2.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.2.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.3.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.3.gate_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.3.up_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.4.down_proj.weight": "model-00151-of-00172.safetensors", + "model.layers.48.mlp.experts.4.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.4.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.5.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.5.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.5.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.6.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.6.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.6.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.7.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.7.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.7.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.8.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.8.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.8.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.9.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.9.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.9.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.10.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.10.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.10.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.11.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.11.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.11.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.12.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.12.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.12.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.13.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.13.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.13.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.14.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.14.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.14.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.15.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.15.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.15.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.16.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.16.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.16.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.17.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.17.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.17.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.18.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.18.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.18.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.19.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.19.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.19.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.20.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.20.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.20.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.21.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.21.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.21.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.22.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.22.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.22.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.23.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.23.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.23.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.24.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.24.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.24.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.25.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.25.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.25.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.26.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.26.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.26.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.27.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.27.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.27.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.28.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.28.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.28.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.29.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.29.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.29.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.30.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.30.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.30.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.31.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.31.gate_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.31.up_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.32.down_proj.weight": "model-00152-of-00172.safetensors", + "model.layers.48.mlp.experts.32.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.32.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.33.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.33.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.33.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.34.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.34.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.34.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.35.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.35.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.35.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.36.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.36.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.36.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.37.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.37.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.37.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.38.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.38.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.38.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.39.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.39.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.39.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.40.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.40.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.40.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.41.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.41.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.41.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.42.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.42.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.42.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.43.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.43.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.43.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.44.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.44.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.44.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.45.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.45.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.45.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.46.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.46.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.46.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.47.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.47.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.47.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.48.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.48.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.48.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.49.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.49.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.49.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.50.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.50.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.50.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.51.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.51.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.51.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.52.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.52.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.52.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.53.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.53.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.53.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.54.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.54.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.54.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.55.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.55.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.55.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.56.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.56.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.56.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.57.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.57.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.57.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.58.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.58.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.58.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.59.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.59.gate_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.59.up_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.60.down_proj.weight": "model-00153-of-00172.safetensors", + "model.layers.48.mlp.experts.60.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.60.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.61.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.61.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.61.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.62.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.62.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.62.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.63.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.63.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.63.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.64.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.64.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.64.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.65.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.65.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.65.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.66.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.66.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.66.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.67.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.67.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.67.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.68.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.68.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.68.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.69.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.69.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.69.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.70.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.70.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.70.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.71.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.71.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.71.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.72.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.72.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.72.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.73.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.73.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.73.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.74.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.74.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.74.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.75.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.75.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.75.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.76.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.76.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.76.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.77.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.77.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.77.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.78.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.78.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.78.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.79.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.79.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.79.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.80.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.80.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.80.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.81.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.81.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.81.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.82.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.82.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.82.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.83.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.83.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.83.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.84.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.84.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.84.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.85.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.85.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.85.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.86.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.86.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.86.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.87.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.87.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.87.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.88.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.88.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.88.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.89.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.89.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.89.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.90.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.90.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.90.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.91.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.91.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.91.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.92.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.92.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.92.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.93.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.93.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.93.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.94.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.94.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.94.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.95.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.95.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.95.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.96.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.96.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.96.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.97.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.97.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.97.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.98.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.98.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.98.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.99.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.99.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.99.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.100.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.100.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.100.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.101.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.101.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.101.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.102.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.102.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.102.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.103.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.103.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.103.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.104.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.104.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.104.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.105.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.105.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.105.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.106.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.106.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.106.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.107.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.107.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.107.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.108.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.108.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.108.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.109.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.109.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.109.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.110.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.110.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.110.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.111.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.111.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.111.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.112.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.112.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.112.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.113.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.113.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.113.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.114.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.114.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.114.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.115.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.115.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.115.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.116.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.116.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.116.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.117.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.117.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.117.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.118.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.118.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.118.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.119.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.119.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.119.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.120.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.120.gate_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.120.up_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.121.down_proj.weight": "model-00154-of-00172.safetensors", + "model.layers.48.mlp.experts.121.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.121.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.122.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.122.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.122.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.123.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.123.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.123.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.124.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.124.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.124.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.125.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.125.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.125.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.126.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.126.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.126.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.127.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.127.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.experts.127.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.gate.weight": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.gate.weight_1": "model-00155-of-00172.safetensors", + "model.layers.48.mlp.moe_statics.e_score_correction_bias": "model-00155-of-00172.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00155-of-00172.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.input_layernorm.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.0.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.0.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.0.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.1.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.1.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.1.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.2.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.2.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.2.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.3.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.3.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.3.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.4.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.4.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.4.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.5.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.5.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.5.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.6.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.6.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.6.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.7.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.7.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.7.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.8.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.8.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.8.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.9.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.9.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.9.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.10.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.10.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.10.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.11.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.11.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.11.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.12.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.12.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.12.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.13.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.13.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.13.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.14.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.14.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.14.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.15.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.15.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.15.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.16.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.16.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.16.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.17.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.17.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.17.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.18.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.18.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.18.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.19.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.19.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.19.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.20.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.20.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.20.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.21.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.21.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.21.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.22.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.22.gate_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.22.up_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.23.down_proj.weight": "model-00155-of-00172.safetensors", + "model.layers.49.mlp.experts.23.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.23.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.24.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.24.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.24.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.25.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.25.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.25.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.26.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.26.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.26.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.27.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.27.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.27.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.28.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.28.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.28.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.29.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.29.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.29.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.30.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.30.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.30.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.31.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.31.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.31.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.32.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.32.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.32.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.33.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.33.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.33.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.34.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.34.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.34.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.35.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.35.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.35.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.36.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.36.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.36.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.37.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.37.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.37.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.38.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.38.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.38.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.39.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.39.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.39.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.40.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.40.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.40.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.41.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.41.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.41.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.42.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.42.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.42.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.43.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.43.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.43.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.44.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.44.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.44.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.45.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.45.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.45.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.46.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.46.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.46.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.47.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.47.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.47.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.48.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.48.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.48.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.49.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.49.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.49.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.50.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.50.gate_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.50.up_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.51.down_proj.weight": "model-00156-of-00172.safetensors", + "model.layers.49.mlp.experts.51.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.51.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.52.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.52.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.52.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.53.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.53.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.53.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.54.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.54.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.54.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.55.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.55.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.55.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.56.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.56.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.56.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.57.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.57.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.57.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.58.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.58.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.58.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.59.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.59.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.59.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.60.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.60.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.60.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.61.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.61.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.61.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.62.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.62.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.62.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.63.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.63.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.63.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.64.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.64.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.64.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.65.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.65.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.65.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.66.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.66.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.66.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.67.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.67.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.67.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.68.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.68.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.68.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.69.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.69.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.69.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.70.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.70.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.70.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.71.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.71.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.71.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.72.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.72.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.72.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.73.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.73.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.73.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.74.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.74.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.74.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.75.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.75.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.75.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.76.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.76.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.76.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.77.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.77.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.77.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.78.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.78.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.78.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.79.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.79.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.79.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.80.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.80.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.80.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.81.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.81.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.81.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.82.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.82.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.82.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.83.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.83.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.83.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.84.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.84.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.84.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.85.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.85.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.85.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.86.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.86.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.86.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.87.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.87.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.87.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.88.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.88.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.88.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.89.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.89.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.89.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.90.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.90.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.90.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.91.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.91.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.91.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.92.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.92.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.92.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.93.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.93.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.93.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.94.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.94.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.94.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.95.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.95.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.95.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.96.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.96.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.96.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.97.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.97.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.97.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.98.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.98.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.98.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.99.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.99.gate_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.99.up_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.100.down_proj.weight": "model-00157-of-00172.safetensors", + "model.layers.49.mlp.experts.100.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.100.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.101.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.101.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.101.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.102.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.102.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.102.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.103.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.103.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.103.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.104.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.104.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.104.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.105.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.105.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.105.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.106.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.106.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.106.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.107.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.107.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.107.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.108.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.108.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.108.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.109.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.109.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.109.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.110.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.110.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.110.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.111.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.111.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.111.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.112.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.112.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.112.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.113.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.113.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.113.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.114.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.114.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.114.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.115.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.115.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.115.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.116.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.116.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.116.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.117.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.117.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.117.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.118.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.118.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.118.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.119.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.119.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.119.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.120.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.120.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.120.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.121.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.121.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.121.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.122.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.122.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.122.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.123.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.123.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.123.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.124.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.124.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.124.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.125.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.125.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.125.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.126.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.126.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.126.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.127.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.127.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.experts.127.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.gate.weight": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.gate.weight_1": "model-00158-of-00172.safetensors", + "model.layers.49.mlp.moe_statics.e_score_correction_bias": "model-00158-of-00172.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00158-of-00172.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.input_layernorm.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.0.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.0.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.0.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.1.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.1.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.1.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.2.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.2.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.2.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.3.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.3.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.3.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.4.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.4.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.4.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.5.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.5.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.5.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.6.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.6.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.6.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.7.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.7.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.7.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.8.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.8.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.8.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.9.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.9.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.9.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.10.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.10.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.10.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.11.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.11.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.11.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.12.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.12.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.12.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.13.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.13.gate_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.13.up_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.14.down_proj.weight": "model-00158-of-00172.safetensors", + "model.layers.50.mlp.experts.14.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.14.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.15.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.15.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.15.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.16.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.16.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.16.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.17.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.17.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.17.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.18.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.18.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.18.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.19.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.19.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.19.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.20.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.20.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.20.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.21.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.21.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.21.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.22.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.22.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.22.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.23.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.23.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.23.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.24.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.24.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.24.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.25.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.25.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.25.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.26.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.26.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.26.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.27.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.27.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.27.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.28.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.28.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.28.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.29.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.29.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.29.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.30.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.30.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.30.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.31.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.31.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.31.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.32.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.32.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.32.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.33.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.33.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.33.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.34.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.34.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.34.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.35.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.35.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.35.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.36.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.36.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.36.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.37.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.37.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.37.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.38.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.38.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.38.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.39.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.39.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.39.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.40.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.40.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.40.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.41.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.41.gate_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.41.up_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.42.down_proj.weight": "model-00159-of-00172.safetensors", + "model.layers.50.mlp.experts.42.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.42.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.43.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.43.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.43.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.44.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.44.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.44.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.45.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.45.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.45.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.46.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.46.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.46.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.47.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.47.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.47.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.48.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.48.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.48.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.49.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.49.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.49.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.50.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.50.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.50.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.51.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.51.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.51.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.52.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.52.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.52.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.53.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.53.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.53.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.54.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.54.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.54.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.55.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.55.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.55.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.56.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.56.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.56.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.57.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.57.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.57.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.58.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.58.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.58.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.59.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.59.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.59.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.60.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.60.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.60.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.61.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.61.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.61.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.62.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.62.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.62.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.63.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.63.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.63.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.64.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.64.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.64.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.65.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.65.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.65.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.66.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.66.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.66.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.67.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.67.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.67.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.68.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.68.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.68.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.69.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.69.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.69.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.70.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.70.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.70.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.71.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.71.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.71.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.72.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.72.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.72.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.73.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.73.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.73.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.74.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.74.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.74.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.75.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.75.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.75.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.76.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.76.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.76.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.77.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.77.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.77.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.78.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.78.gate_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.78.up_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.79.down_proj.weight": "model-00160-of-00172.safetensors", + "model.layers.50.mlp.experts.79.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.79.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.80.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.80.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.80.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.81.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.81.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.81.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.82.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.82.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.82.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.83.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.83.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.83.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.84.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.84.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.84.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.85.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.85.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.85.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.86.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.86.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.86.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.87.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.87.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.87.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.88.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.88.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.88.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.89.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.89.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.89.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.90.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.90.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.90.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.91.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.91.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.91.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.92.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.92.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.92.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.93.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.93.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.93.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.94.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.94.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.94.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.95.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.95.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.95.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.96.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.96.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.96.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.97.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.97.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.97.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.98.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.98.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.98.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.99.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.99.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.99.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.100.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.100.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.100.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.101.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.101.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.101.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.102.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.102.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.102.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.103.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.103.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.103.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.104.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.104.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.104.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.105.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.105.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.105.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.106.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.106.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.106.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.107.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.107.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.107.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.108.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.108.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.108.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.109.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.109.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.109.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.110.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.110.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.110.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.111.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.111.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.111.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.112.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.112.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.112.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.113.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.113.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.113.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.114.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.114.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.114.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.115.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.115.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.115.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.116.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.116.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.116.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.117.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.117.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.117.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.118.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.118.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.118.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.119.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.119.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.119.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.120.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.120.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.120.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.121.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.121.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.121.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.122.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.122.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.122.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.123.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.123.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.123.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.124.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.124.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.124.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.125.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.125.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.125.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.126.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.126.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.126.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.127.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.127.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.experts.127.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.gate.weight": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.gate.weight_1": "model-00161-of-00172.safetensors", + "model.layers.50.mlp.moe_statics.e_score_correction_bias": "model-00161-of-00172.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00161-of-00172.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.input_layernorm.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.0.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.0.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.0.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.1.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.1.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.1.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.2.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.2.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.2.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.3.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.3.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.3.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.4.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.4.gate_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.4.up_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.5.down_proj.weight": "model-00161-of-00172.safetensors", + "model.layers.51.mlp.experts.5.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.5.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.6.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.6.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.6.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.7.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.7.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.7.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.8.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.8.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.8.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.9.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.9.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.9.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.10.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.10.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.10.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.11.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.11.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.11.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.12.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.12.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.12.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.13.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.13.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.13.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.14.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.14.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.14.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.15.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.15.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.15.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.16.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.16.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.16.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.17.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.17.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.17.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.18.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.18.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.18.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.19.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.19.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.19.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.20.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.20.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.20.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.21.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.21.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.21.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.22.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.22.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.22.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.23.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.23.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.23.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.24.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.24.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.24.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.25.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.25.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.25.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.26.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.26.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.26.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.27.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.27.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.27.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.28.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.28.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.28.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.29.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.29.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.29.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.30.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.30.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.30.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.31.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.31.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.31.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.32.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.32.gate_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.32.up_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.33.down_proj.weight": "model-00162-of-00172.safetensors", + "model.layers.51.mlp.experts.33.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.33.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.34.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.34.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.34.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.35.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.35.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.35.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.36.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.36.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.36.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.37.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.37.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.37.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.38.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.38.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.38.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.39.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.39.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.39.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.40.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.40.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.40.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.41.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.41.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.41.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.42.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.42.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.42.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.43.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.43.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.43.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.44.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.44.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.44.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.45.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.45.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.45.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.46.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.46.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.46.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.47.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.47.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.47.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.48.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.48.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.48.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.49.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.49.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.49.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.50.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.50.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.50.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.51.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.51.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.51.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.52.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.52.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.52.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.53.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.53.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.53.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.54.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.54.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.54.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.55.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.55.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.55.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.56.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.56.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.56.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.57.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.57.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.57.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.58.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.58.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.58.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.59.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.59.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.59.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.60.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.60.gate_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.60.up_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.61.down_proj.weight": "model-00163-of-00172.safetensors", + "model.layers.51.mlp.experts.61.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.61.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.62.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.62.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.62.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.63.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.63.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.63.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.64.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.64.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.64.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.65.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.65.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.65.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.66.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.66.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.66.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.67.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.67.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.67.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.68.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.68.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.68.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.69.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.69.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.69.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.70.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.70.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.70.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.71.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.71.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.71.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.72.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.72.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.72.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.73.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.73.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.73.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.74.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.74.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.74.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.75.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.75.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.75.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.76.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.76.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.76.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.77.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.77.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.77.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.78.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.78.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.78.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.79.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.79.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.79.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.80.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.80.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.80.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.81.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.81.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.81.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.82.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.82.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.82.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.83.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.83.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.83.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.84.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.84.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.84.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.85.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.85.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.85.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.86.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.86.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.86.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.87.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.87.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.87.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.88.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.88.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.88.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.89.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.89.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.89.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.90.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.90.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.90.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.91.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.91.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.91.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.92.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.92.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.92.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.93.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.93.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.93.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.94.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.94.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.94.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.95.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.95.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.95.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.96.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.96.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.96.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.97.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.97.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.97.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.98.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.98.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.98.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.99.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.99.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.99.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.100.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.100.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.100.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.101.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.101.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.101.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.102.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.102.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.102.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.103.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.103.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.103.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.104.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.104.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.104.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.105.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.105.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.105.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.106.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.106.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.106.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.107.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.107.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.107.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.108.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.108.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.108.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.109.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.109.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.109.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.110.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.110.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.110.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.111.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.111.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.111.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.112.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.112.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.112.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.113.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.113.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.113.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.114.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.114.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.114.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.115.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.115.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.115.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.116.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.116.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.116.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.117.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.117.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.117.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.118.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.118.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.118.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.119.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.119.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.119.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.120.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.120.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.120.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.121.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.121.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.121.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.122.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.122.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.122.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.123.down_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.123.gate_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.123.up_proj.weight": "model-00164-of-00172.safetensors", + "model.layers.51.mlp.experts.124.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.124.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.124.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.125.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.125.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.125.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.126.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.126.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.126.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.127.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.127.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.experts.127.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.gate.weight": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.gate.weight_1": "model-00165-of-00172.safetensors", + "model.layers.51.mlp.moe_statics.e_score_correction_bias": "model-00165-of-00172.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00165-of-00172.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.input_layernorm.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.0.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.0.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.0.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.1.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.1.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.1.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.2.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.2.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.2.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.3.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.3.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.3.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.4.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.4.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.4.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.5.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.5.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.5.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.6.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.6.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.6.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.7.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.7.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.7.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.8.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.8.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.8.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.9.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.9.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.9.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.10.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.10.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.10.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.11.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.11.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.11.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.12.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.12.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.12.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.13.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.13.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.13.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.14.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.14.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.14.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.15.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.15.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.15.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.16.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.16.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.16.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.17.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.17.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.17.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.18.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.18.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.18.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.19.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.19.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.19.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.20.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.20.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.20.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.21.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.21.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.21.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.22.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.22.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.22.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.23.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.23.gate_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.23.up_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.24.down_proj.weight": "model-00165-of-00172.safetensors", + "model.layers.52.mlp.experts.24.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.24.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.25.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.25.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.25.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.26.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.26.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.26.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.27.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.27.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.27.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.28.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.28.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.28.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.29.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.29.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.29.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.30.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.30.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.30.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.31.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.31.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.31.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.32.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.32.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.32.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.33.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.33.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.33.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.34.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.34.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.34.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.35.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.35.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.35.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.36.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.36.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.36.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.37.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.37.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.37.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.38.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.38.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.38.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.39.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.39.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.39.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.40.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.40.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.40.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.41.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.41.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.41.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.42.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.42.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.42.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.43.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.43.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.43.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.44.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.44.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.44.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.45.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.45.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.45.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.46.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.46.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.46.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.47.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.47.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.47.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.48.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.48.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.48.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.49.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.49.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.49.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.50.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.50.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.50.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.51.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.51.gate_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.51.up_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.52.down_proj.weight": "model-00166-of-00172.safetensors", + "model.layers.52.mlp.experts.52.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.52.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.53.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.53.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.53.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.54.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.54.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.54.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.55.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.55.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.55.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.56.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.56.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.56.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.57.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.57.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.57.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.58.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.58.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.58.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.59.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.59.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.59.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.60.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.60.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.60.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.61.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.61.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.61.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.62.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.62.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.62.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.63.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.63.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.63.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.64.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.64.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.64.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.65.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.65.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.65.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.66.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.66.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.66.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.67.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.67.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.67.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.68.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.68.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.68.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.69.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.69.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.69.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.70.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.70.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.70.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.71.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.71.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.71.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.72.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.72.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.72.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.73.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.73.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.73.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.74.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.74.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.74.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.75.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.75.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.75.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.76.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.76.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.76.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.77.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.77.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.77.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.78.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.78.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.78.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.79.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.79.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.79.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.80.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.80.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.80.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.81.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.81.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.81.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.82.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.82.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.82.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.83.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.83.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.83.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.84.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.84.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.84.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.85.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.85.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.85.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.86.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.86.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.86.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.87.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.87.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.87.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.88.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.88.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.88.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.89.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.89.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.89.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.90.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.90.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.90.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.91.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.91.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.91.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.92.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.92.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.92.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.93.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.93.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.93.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.94.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.94.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.94.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.95.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.95.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.95.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.96.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.96.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.96.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.97.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.97.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.97.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.98.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.98.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.98.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.99.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.99.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.99.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.100.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.100.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.100.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.101.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.101.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.101.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.102.down_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.102.gate_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.102.up_proj.weight": "model-00167-of-00172.safetensors", + "model.layers.52.mlp.experts.103.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.103.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.103.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.104.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.104.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.104.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.105.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.105.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.105.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.106.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.106.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.106.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.107.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.107.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.107.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.108.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.108.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.108.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.109.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.109.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.109.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.110.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.110.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.110.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.111.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.111.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.111.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.112.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.112.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.112.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.113.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.113.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.113.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.114.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.114.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.114.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.115.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.115.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.115.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.116.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.116.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.116.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.117.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.117.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.117.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.118.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.118.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.118.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.119.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.119.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.119.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.120.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.120.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.120.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.121.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.121.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.121.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.122.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.122.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.122.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.123.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.123.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.123.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.124.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.124.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.124.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.125.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.125.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.125.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.126.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.126.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.126.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.127.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.127.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.experts.127.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.gate.weight": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.gate.weight_1": "model-00168-of-00172.safetensors", + "model.layers.52.mlp.moe_statics.e_score_correction_bias": "model-00168-of-00172.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00168-of-00172.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.input_layernorm.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.0.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.0.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.0.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.1.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.1.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.1.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.2.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.2.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.2.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.3.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.3.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.3.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.4.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.4.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.4.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.5.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.5.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.5.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.6.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.6.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.6.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.7.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.7.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.7.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.8.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.8.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.8.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.9.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.9.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.9.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.10.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.10.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.10.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.11.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.11.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.11.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.12.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.12.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.12.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.13.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.13.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.13.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.14.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.14.gate_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.14.up_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.15.down_proj.weight": "model-00168-of-00172.safetensors", + "model.layers.53.mlp.experts.15.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.15.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.16.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.16.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.16.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.17.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.17.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.17.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.18.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.18.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.18.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.19.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.19.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.19.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.20.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.20.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.20.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.21.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.21.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.21.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.22.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.22.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.22.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.23.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.23.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.23.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.24.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.24.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.24.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.25.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.25.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.25.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.26.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.26.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.26.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.27.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.27.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.27.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.28.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.28.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.28.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.29.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.29.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.29.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.30.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.30.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.30.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.31.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.31.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.31.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.32.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.32.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.32.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.33.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.33.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.33.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.34.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.34.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.34.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.35.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.35.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.35.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.36.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.36.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.36.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.37.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.37.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.37.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.38.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.38.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.38.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.39.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.39.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.39.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.40.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.40.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.40.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.41.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.41.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.41.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.42.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.42.gate_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.42.up_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.43.down_proj.weight": "model-00169-of-00172.safetensors", + "model.layers.53.mlp.experts.43.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.43.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.44.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.44.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.44.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.45.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.45.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.45.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.46.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.46.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.46.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.47.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.47.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.47.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.48.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.48.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.48.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.49.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.49.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.49.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.50.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.50.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.50.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.51.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.51.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.51.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.52.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.52.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.52.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.53.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.53.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.53.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.54.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.54.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.54.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.55.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.55.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.55.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.56.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.56.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.56.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.57.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.57.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.57.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.58.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.58.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.58.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.59.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.59.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.59.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.60.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.60.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.60.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.61.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.61.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.61.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.62.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.62.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.62.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.63.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.63.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.63.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.64.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.64.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.64.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.65.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.65.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.65.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.66.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.66.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.66.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.67.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.67.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.67.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.68.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.68.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.68.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.69.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.69.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.69.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.70.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.70.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.70.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.71.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.71.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.71.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.72.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.72.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.72.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.73.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.73.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.73.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.74.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.74.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.74.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.75.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.75.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.75.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.76.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.76.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.76.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.77.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.77.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.77.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.78.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.78.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.78.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.79.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.79.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.79.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.80.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.80.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.80.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.81.down_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.81.gate_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.81.up_proj.weight": "model-00170-of-00172.safetensors", + "model.layers.53.mlp.experts.82.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.82.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.82.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.83.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.83.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.83.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.84.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.84.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.84.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.85.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.85.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.85.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.86.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.86.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.86.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.87.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.87.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.87.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.88.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.88.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.88.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.89.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.89.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.89.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.90.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.90.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.90.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.91.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.91.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.91.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.92.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.92.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.92.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.93.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.93.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.93.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.94.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.94.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.94.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.95.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.95.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.95.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.96.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.96.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.96.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.97.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.97.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.97.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.98.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.98.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.98.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.99.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.99.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.99.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.100.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.100.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.100.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.101.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.101.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.101.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.102.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.102.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.102.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.103.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.103.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.103.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.104.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.104.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.104.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.105.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.105.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.105.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.106.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.106.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.106.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.107.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.107.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.107.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.108.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.108.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.108.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.109.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.109.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.109.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.110.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.110.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.110.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.111.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.111.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.111.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.112.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.112.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.112.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.113.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.113.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.113.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.114.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.114.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.114.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.115.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.115.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.115.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.116.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.116.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.116.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.117.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.117.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.117.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.118.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.118.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.118.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.119.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.119.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.119.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.120.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.120.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.120.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.121.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.121.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.121.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.122.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.122.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.122.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.123.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.123.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.123.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.124.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.124.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.124.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.125.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.125.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.125.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.126.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.126.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.126.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.127.down_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.127.gate_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.experts.127.up_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.gate.weight": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.gate.weight_1": "model-00171-of-00172.safetensors", + "model.layers.53.mlp.moe_statics.e_score_correction_bias": "model-00171-of-00172.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00171-of-00172.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00171-of-00172.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00171-of-00172.safetensors", + "model.norm.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.after_norm.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.mlp.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.mlp.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.spatial_linear.0.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.spatial_linear.0.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.spatial_linear.2.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.spatial_linear.2.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.spatial_linear.3.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.spatial_linear.3.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.temporal_linear.0.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.temporal_linear.0.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.temporal_linear.2.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.temporal_linear.2.weight": "model-00171-of-00172.safetensors", + "model.resampler_model.temporal_linear.3.bias": "model-00171-of-00172.safetensors", + "model.resampler_model.temporal_linear.3.weight": "model-00171-of-00172.safetensors", + "lm_head.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.0.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.1.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.2.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.3.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.4.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.5.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.6.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.7.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.8.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.9.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.10.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.11.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.12.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.13.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.14.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.15.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.16.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.17.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.18.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.19.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.20.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.21.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.22.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.23.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.24.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.25.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.26.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.27.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.28.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.29.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.30.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.attn.proj.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.attn.proj.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.attn.qkv.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.attn.qkv.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.mlp.fc1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.mlp.fc1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.mlp.fc2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.mlp.fc2.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.norm1.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.norm1.weight": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.norm2.bias": "model-00172-of-00172.safetensors", + "vision_model.blocks.31.norm2.weight": "model-00172-of-00172.safetensors", + "vision_model.ln.bias": "model-00172-of-00172.safetensors", + "vision_model.ln.weight": "model-00172-of-00172.safetensors", + "vision_model.patch_embed.proj.weight": "model-00172-of-00172.safetensors" + } +} \ No newline at end of file diff --git a/modeling_ernie_45t_vl.py b/modeling_ernie_45t_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..4cd479db56ed29df04384590ad44516564c28509 --- /dev/null +++ b/modeling_ernie_45t_vl.py @@ -0,0 +1,4210 @@ +# Copyright (c) 2025 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ernie VL model""" +import re +import math +import itertools +from dataclasses import dataclass +from collections import defaultdict +from copy import deepcopy +from functools import partial +from typing import List, Optional, Tuple, Union + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from transformers.activations import ACT2FN +from transformers.generation import GenerationMixin +from transformers.modeling_outputs import ModelOutput +from transformers.modeling_utils import PreTrainedModel +from transformers.utils import logging +from .configuration_ernie_45t_vl import ( + DFNRopeVisionTransformerConfig, + Ernie4_5_MoEConfig, + Ernie4_5_VLMoEConfig, +) + +logger = logging.get_logger(__name__) + + +__all__ = [ + "Ernie4_5_VLMoeForConditionalGeneration", + "DFNRopeVisionTransformerPreTrainedModel", + "VariableResolutionResamplerModel", +] + + +class TokenType: + """token type definition""" + + text = 0 + image = 1 + video = 2 + + +class UniqueNameGuard: + """name guard""" + + def __init__(self, prefix=""): + self.prefix = prefix + self.counter = {} + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def get_unique_name(self, name): + """get unique name""" + if name not in self.counter: + self.counter[name] = 0 + else: + self.counter[name] += 1 + return f"{self.prefix}{name}_{self.counter[name]}" + + +class RopeEmbedding(nn.Module): + """ + Rotary Position Embedding (RoPE) implementation for transformer models. + + RoPE encodes absolute positional information with rotation matrices and + naturally incorporates relative position information in self-attention. + + Args: + head_dim (int): Dimension size of each attention head + compression_ratio (float, optional): Sequence length compression ratio. Defaults to 1.0. + base (int, optional): Base value for frequency calculation. Defaults to 10000. + + Attributes: + head_dim (int): Dimension size of each attention head + compression_ratio (float): Sequence length compression factor + base (int): Base value for frequency calculation + """ + + def __init__(self, head_dim, compression_ratio=1.0, base=10000, freq_allocation=0): + """ + Initialize RoPE embedding layer. + + Args: + head_dim: Dimension of each attention head + compression_ratio: Scaling factor for position indices + base: Base value for frequency calculation + """ + super().__init__() + self.head_dim = head_dim + self.compression_ratio = compression_ratio + self.base = base + + # num of freq allocated to time + self.freq_allocation = freq_allocation + + def forward(self, seq_length, position_ids=None): + """ + Compute rotary position embeddings for given sequence length. + + Args: + seq_length (int): Maximum sequence length + position_ids (Tensor, optional): Custom position indices. Defaults to None. + + Returns: + Tensor: Rotary position embeddings of shape [1, 1, seq_length, head_dim] + """ + indices = torch.arange(0, self.head_dim, 2, dtype=torch.float32) + indices = 1 / self.base ** (indices / self.head_dim) + if position_ids is None: + position_ids = torch.arange( + 0, seq_length, 1, dtype=torch.float32 + ).unsqueeze(1) + position_ids = position_ids / self.compression_ratio + sinusoid_inp = position_ids * indices.unsqueeze(0) + else: + position_ids = position_ids / self.compression_ratio + seq_length = position_ids.shape[-1] + sinusoid_inp = position_ids.unsqueeze(-1).to( + torch.float32 + ) * indices.unsqueeze(0) + pos_emb = torch.cat([torch.sin(sinusoid_inp), torch.cos(sinusoid_inp)], dim=-1) + pos_emb = pos_emb.view(-1, 1, seq_length, self.head_dim) + pos_emb = pos_emb.detach() + return pos_emb + + def apply_rotary(self, rp, q, k): + """ + Apply rotary position embeddings to queries and keys. + + Args: + rp (Tensor): Rotary position embeddings + q (Tensor): Query tensor [batch, heads, seq_len, dim] + k (Tensor): Key tensor [batch, heads, seq_len, dim] + + Returns: + Tuple[Tensor, Tensor]: Rotated queries and keys + """ + sin, cos = torch.chunk(rp, 2, dim=-1) + # sin [θ0,θ1,θ2......θd/2-1] -> sin_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] + sin_pos = torch.stack([sin, sin], dim=-1).reshape(rp.shape) + # cos [θ0,θ1,θ2......θd/2-1] -> cos_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] + cos_pos = torch.stack([cos, cos], dim=-1).reshape(rp.shape) + # rotate_half_query_layer [-q1,q0,-q3,q2......,-qd-1,qd-2] + rotate_half_q = torch.stack( + [-q[:, :, :, 1::2], q[:, :, :, 0::2]], dim=-1 + ).reshape(q.shape) + query = (q.to(torch.float32) * cos_pos) + ( + rotate_half_q.to(torch.float32) * sin_pos + ) + # rotate_half_key_layer [-k1,k0,-k3,k2......,-kd-1,kd-2] + rotate_half_k = torch.stack( + [-k[:, :, :, 1::2], k[:, :, :, 0::2]], dim=-1 + ).reshape(k.shape) + key = (k.to(torch.float32) * cos_pos) + ( + rotate_half_k.to(torch.float32) * sin_pos + ) + return query, key + + def apply_rotary_3d(self, rp, q, k, position_ids): + """ + rope 3d rotary + + args: + rp: [1, max_seqlen, 1, head_dim] + q: [bsz, seqlen, head, head_dim] + k: [bsz, seqlen, head, head_dim] + position_ids: [bsz, seqlen, 3] + """ + current_device = q.device + sin, cos = torch.chunk(rp, 2, axis=-1) + assert position_ids.shape[:1] == q.shape[:1] + batch_indices = torch.arange(end=position_ids.shape[0]) + batch_indices = batch_indices[..., None] + sin = sin.tile(position_ids.shape[0], 1, 1, 1).to(device=position_ids.device) + cos = cos.tile(position_ids.shape[0], 1, 1, 1).to(device=position_ids.device) + + assert self.freq_allocation != 0 + sin_t = sin[batch_indices, position_ids[..., 0], :, -self.freq_allocation :] + sin_h = sin[ + batch_indices, + position_ids[..., 1], + :, + : self.head_dim // 2 - self.freq_allocation : 2, + ] + sin_w = sin[ + batch_indices, + position_ids[..., 2], + :, + 1 : self.head_dim // 2 - self.freq_allocation : 2, + ] + sin_hw = torch.stack([sin_h, sin_w], dim=-1).reshape( + sin_h.shape[:-1] + (sin_h.shape[-1] * 2,) + ) + sin_thw = torch.cat([sin_hw, sin_t], dim=-1) + + cos_t = cos[batch_indices, position_ids[..., 0], :, -self.freq_allocation :] + cos_h = cos[ + batch_indices, + position_ids[..., 1], + :, + : self.head_dim // 2 - self.freq_allocation : 2, + ] + cos_w = cos[ + batch_indices, + position_ids[..., 2], + :, + 1 : self.head_dim // 2 - self.freq_allocation : 2, + ] + cos_hw = torch.stack([cos_h, cos_w], dim=-1).reshape( + cos_h.shape[:-1] + (cos_h.shape[-1] * 2,) + ) + cos_thw = torch.cat([cos_hw, cos_t], dim=-1) + + # sin [θ0,θ1,θ2......θd/2-1] -> sin_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] + sin_pos = ( + torch.stack([sin_thw, sin_thw], dim=-1) + .reshape(sin_thw.shape[:3] + (sin_thw.shape[-1] * 2,)) + .to(current_device) + ) + # cos [θ0,θ1,θ2......θd/2-1] -> cos_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] + cos_pos = ( + torch.stack([cos_thw, cos_thw], dim=-1) + .reshape(cos_thw.shape[:3] + (cos_thw.shape[-1] * 2,)) + .to(current_device) + ) + + # rotate_half_query_layer [-q1,q0,-q3,q2......,-qd-1,qd-2] + rotate_half_q = torch.stack( + [-q[:, :, :, 1::2], q[:, :, :, 0::2]], dim=-1 + ).reshape(q.shape) + query = (q.to(torch.float32) * cos_pos) + ( + rotate_half_q.to(torch.float32) * sin_pos + ) + # rotate_half_key_layer [-k1,k0,-k3,k2......,-kd-1,kd-2] + rotate_half_k = torch.stack( + [-k[:, :, :, 1::2], k[:, :, :, 0::2]], dim=-1 + ).reshape(k.shape) + key = (k.to(torch.float32) * cos_pos) + ( + rotate_half_k.to(torch.float32) * sin_pos + ) + return query, key + + +class Ernie4_5_MLP(nn.Module): + """ + Ernie4_5_MLP - Gated Multi-Layer Perceptron module used in Ernie model. + """ + + def __init__(self, config, layer_idx=0): + """ + Initialize the MLP module with configuration options. + + Args: + config (Ernie4_5_Config): Model configurations. + layer_idx (int): Index of current layer (default: 0) + """ + super().__init__() + self.config = config + self.hidden_size = config.hidden_size + self.intermediate_size = config.intermediate_size + + self.gate_proj = nn.Linear( + self.hidden_size, self.intermediate_size, bias=config.use_bias + ) + self.up_proj = nn.Linear( + self.hidden_size, self.intermediate_size, bias=config.use_bias + ) + self.down_proj = nn.Linear( + self.intermediate_size, self.hidden_size, bias=config.use_bias + ) + + def forward(self, x): + """ + Forward pass through the MLP module. + + Args: + x (Tensor): Input tensor of shape [batch_size, seq_len, hidden_size] + + Returns: + Tensor: Output tensor of shape [batch_size, seq_len, hidden_size] + """ + current_device = self.gate_proj.weight.data.device + x = x.to(current_device) + down_proj = self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x)) + return down_proj + + +class Ernie4_5_Attention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config, layer_idx=0): + """Initialize the attention layer. + + Args: + config (Ernie4_5_Config): Model configuration. + layer_idx (int, optional): Index in transformer stack. Defaults to 0. + """ + super().__init__() + self.layer_idx = layer_idx + self.hidden_size = config.hidden_size + self.num_heads = config.num_attention_heads + self.num_key_value_heads = config.num_key_value_heads + self.head_dim = self.hidden_size // self.num_heads + self.is_gqa = ( + self.num_key_value_heads is not None + and self.num_key_value_heads != self.num_heads + ) + + self.freq_allocation = getattr(config, "freq_allocation", 0) + assert ( + self.freq_allocation is not None + ), "freq_allocation must be provided if rope_3d is on." + + if config.tensor_parallel_degree > 1: + assert ( + self.num_heads % config.tensor_parallel_degree == 0 + ), f"num_heads: {self.num_heads}, tensor_parallel_degree: {config.tensor_parallel_degree}" + self.num_heads = self.num_heads // config.tensor_parallel_degree + if self.is_gqa: + assert ( + self.num_key_value_heads % config.tensor_parallel_degree == 0 + ), f"num_heads: {self.num_key_value_heads}, tensor_parallel_degree: {config.tensor_parallel_degree}" + self.num_key_value_heads = ( + self.num_key_value_heads // config.tensor_parallel_degree + ) + q_hidden_size = self.head_dim * self.num_heads + if self.is_gqa: + logger.info( + f"use GQA - num_heads: {self.num_heads}- num_key_value_heads: {self.num_key_value_heads}" + ) + assert ( + self.num_heads % self.num_key_value_heads == 0 + ), f"num_heads: {self.num_heads}, num_key_value_heads: {self.num_key_value_heads}" + kv_hidden_size = self.head_dim * self.num_key_value_heads + else: + kv_hidden_size = self.head_dim * self.num_heads + + self.q_proj = nn.Linear(self.hidden_size, q_hidden_size, bias=config.use_bias) + self.k_proj = nn.Linear(self.hidden_size, kv_hidden_size, bias=config.use_bias) + self.v_proj = nn.Linear(self.hidden_size, kv_hidden_size, bias=config.use_bias) + + self.o_proj = nn.Linear( + self.hidden_size, + self.hidden_size, + bias=config.use_bias, + ) + + self.rotary_emb = RopeEmbedding( + self.head_dim, + compression_ratio=config.compression_ratio, + base=config.rope_theta, + freq_allocation=self.freq_allocation, + ) + self.config = config + self.attn_func = self.core_attn + + def forward( + self, + hidden_states, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + attention_mask: Optional[torch.Tensor] = None, + attn_mask_start_row_indices: Optional[torch.Tensor] = None, + position_ids: Optional[Tuple[torch.Tensor]] = None, + output_attentions: bool = False, + use_cache: bool = False, + token_type_ids: Optional[Tuple[torch.Tensor]] = None, # MLLM + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Compute attention outputs. + + Args: + hidden_states (torch.Tensor): Input tensor [bsz, seq_len, hidden_size] + past_key_value (Optional[Tuple[torch.Tensor, torch.Tensor]]): Cached key/value states + attention_mask (Optional[torch.Tensor]): Attention mask tensor + attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length attention indices + position_ids (Optional[torch.Tensor]): Position indices for RoPE + output_attentions (bool): Return attention weights if True + use_cache (bool): Cache key/value states if True + + Returns: + Tuple containing: + - attention_output: [bsz, seq_len, hidden_size] + - attention_weights: Optional attention probabilities + - updated_key_value_cache: Optional updated cache + """ + if token_type_ids is not None: + token_type_ids = token_type_ids[:, :-1] + + bsz, q_len, _ = hidden_states.shape + query_states = self.q_proj(hidden_states).reshape( + [bsz, q_len, -1, self.head_dim] + ) + key_states = self.k_proj(hidden_states).reshape([bsz, q_len, -1, self.head_dim]) + value_states = self.v_proj(hidden_states).reshape( + [bsz, q_len, -1, self.head_dim] + ) + + attn_output, attn_weights, past_key_value = self.rope_attn( + query_states=query_states, + key_states=key_states, + value_states=value_states, + attention_mask=attention_mask, + position_ids=position_ids, + output_attentions=output_attentions, + past_key_value=past_key_value, + use_cache=use_cache, + attn_mask_start_row_indices=attn_mask_start_row_indices, + ) + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + def repeat_kv(self, hidden_states, n_rep): + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, None, :, :].expand( + batch, num_key_value_heads, n_rep, slen, head_dim + ) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) + + def core_attn( + self, + q, + k, + v, + attention_mask=None, + attn_mask_start_row_indices=None, + seq_length=None, + ): + """Standard self-attention implementation. + + Args: + q (torch.Tensor): Query tensor + k (torch.Tensor): Key tensor + v (torch.Tensor): Value tensor + attention_mask (Optional[torch.Tensor]): Attention mask + attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length indices + seq_length (Optional[int]): Sequence length + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Attention output and weights + """ + origin_dtype = q.dtype + + q = q.permute(0, 2, 1, 3) + k = k.permute(0, 2, 1, 3) + v = v.permute(0, 2, 1, 3) + + scale_qk_coeff = getattr(self.config, "scale_qk_coeff", 1.0) * ( + self.head_dim**0.5 + ) + + q = q / scale_qk_coeff + + # Handle GQA case - repeat k and v heads to match q heads + if self.is_gqa: + # [batch, num_key_value_heads, seq_len, head_dim] -> [batch, num_heads, seq_len, head_dim] + repeat_factor = self.num_heads // self.num_key_value_heads + k = self.repeat_kv(k, repeat_factor) + v = self.repeat_kv(v, repeat_factor) + + product = torch.matmul(q, k.transpose(-2, -1)) + + product = product.to(torch.float32) + if getattr(self.config, "scale_qk_coeff", 1.0) != 1.0: + product = product * getattr(self.config, "scale_qk_coeff", 1.0) + + if attention_mask is not None: + attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + attention_mask = attention_mask.to(torch.float32) + product = product + attention_mask + weights = F.softmax(product, dim=-1) + else: + seq_len = product.size(-1) + mask = torch.triu( + torch.ones((seq_len, seq_len), dtype=torch.bool, device=product.device), + diagonal=1, + ) + product = product.masked_fill(mask, float("-inf")) + weights = F.softmax(product, dim=-1) + + weights = weights.to(origin_dtype) + + if getattr(self.config, "attention_probs_dropout_prob", 0.0) > 0: + weights = F.dropout( + weights, + self.config.attention_probs_dropout_prob, + training=self.training, + ) + + out = torch.matmul(weights, v) + + # combine heads + out = out.permute(0, 2, 1, 3) + out = out.contiguous().view(out.size(0), out.size(1), -1) + + return out, weights + + def rope_attn( + self, + query_states, + key_states, + value_states, + attention_mask, + position_ids, + output_attentions=False, + past_key_value=None, + use_cache=False, + attn_mask_start_row_indices=None, + ): + """Attention computation with rotary embeddings. + + Args: + mix_layer (Optional[torch.Tensor]): Combined QKV projection + query_states (torch.Tensor): Query states + key_states (torch.Tensor): Key states + value_states (torch.Tensor): Value states + attention_mask (Optional[torch.Tensor]): Attention mask + position_ids (Optional[torch.Tensor]): Position indices + output_attentions (bool): Return attention weights + past_key_value (Optional[Tuple[torch.Tensor, torch.Tensor]]): Cached states + use_cache (bool): Cache new states + attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length indices + + Returns: + Tuple containing: + - attention_output: Result tensor + - attention_weights: Optional weights + - updated_key_value_cache: Optional cache + """ + + query_states_dtype = query_states.dtype + + assert position_ids is not None, "rope3d requires pos-id" + kv_seq_len = position_ids.max() + 1 + offset = 0 + if past_key_value is not None: + offset = position_ids.max() + kv_seq_len = position_ids.max() + 1 + position_ids = position_ids[:, -1:, :] + + cos_sin = self.rotary_emb(kv_seq_len).permute([0, 2, 1, 3]) + if offset > 0 and position_ids is None: + cos_sin = cos_sin[:, offset:] + query_states, key_states = self.rotary_emb.apply_rotary_3d( + cos_sin, query_states, key_states, position_ids + ) + + query_states = query_states.to(query_states_dtype) + key_states = key_states.to(query_states_dtype) + if past_key_value is not None: + # reuse k, v, self_attention + key_states = torch.cat([past_key_value[0], key_states], dim=1) + value_states = torch.cat([past_key_value[1], value_states], dim=1) + + # shape: [2, b, s, kvh, d] + past_key_value = [key_states, value_states] if use_cache else None + seq_length = query_states.shape[1] + attn_output, attn_weights = self.attn_func( + query_states, + key_states, + value_states, + attention_mask, + attn_mask_start_row_indices, + seq_length, + ) + + return attn_output, attn_weights, past_key_value + + +class FusedDropoutImpl(nn.Module): + """ + Fused dropout implementation with residual connection support. + + This layer combines dropout and residual addition in a single operation for better performance, + particularly on GPU devices. The dropout is conditionally applied based on the probability. + + Args: + prob (float): Dropout probability (between 0 and 1) + mode (str): Dropout mode, either 'upscale_in_train' or 'downscale_in_infer' + + Attributes: + prob (float): Stores the dropout probability + mode (str): Stores the dropout mode + dropout (nn.Dropout): The actual dropout layer instance + """ + + def __init__(self, prob, mode): + """ + Initialize the fused dropout layer. + + Args: + prob (float): Dropout probability (0 means no dropout) + mode (str): Dropout mode ('upscale_in_train' or 'downscale_in_infer') + """ + super().__init__() + self.prob = prob + self.dropout = nn.Dropout(p=prob) + + def forward(self, x, y): + """ + Forward pass of the fused dropout layer. + + Args: + x (Tensor): Input tensor to potentially apply dropout on + y (Tensor): Residual tensor to add to the (possibly dropped out) x + + Returns: + Tensor: Result of x (with optional dropout) + y + """ + if self.prob > 0: + x = self.dropout(x) + output = x + y + + return output + + +class RMSNorm(nn.Module): + """ + Root Mean Square Layer Normalization (RMSNorm) implementation. + + RMSNorm is a simplified version of LayerNorm that focuses on the root mean square of inputs, + omitting the mean-centering operation. This provides computational efficiency while maintaining + good performance. + + """ + + def __init__(self, config): + """ + Initialize RMSNorm layer. + + Args: + config (Ernie4_5_Config): Model configuration. + """ + super().__init__() + self.hidden_size = config.hidden_size + self.weight = nn.Parameter( + torch.ones(self.hidden_size, dtype=torch.get_default_dtype()) + ) + self.variance_epsilon = config.rms_norm_eps + + def forward(self, hidden_states): + """ + Apply RMS normalization to input hidden states. + + Args: + hidden_states (Tensor): Input tensor of shape [batch_size, seq_len, hidden_size] + + Returns: + Tensor: Normalized output tensor of same shape as input + + Note: + - computes RMSNorm manually: + 1. Compute variance of features + 2. Apply reciprocal square root normalization + 3. Scale by learned weight parameter + - Maintains original dtype for numerical stability during computation + """ + variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True) + hidden_states = torch.rsqrt(variance + self.variance_epsilon) * hidden_states + return hidden_states.to(self.weight.dtype) * self.weight + + +class Ernie4_5_MoeMLP(Ernie4_5_MLP): + """Mixture of Experts (MoE) variant of ERNIE's MLP layer.""" + + def __init__(self, config, layer_idx=0): + """Initialize the MoE MLP layer. + + Args: + config (Ernie4_5_MoEConfig): Configuration for MoE architecture. + layer_idx (int): Index of current layer in transformer stack + """ + + if getattr(config, "disable_ffn_model_parallel", False): + config = deepcopy(config) + config.tensor_parallel_degree = 1 + + super().__init__(config, layer_idx=layer_idx) + self.moe_dropout_prob = config.moe_dropout_prob + + def forward(self, x): + """Forward pass through MoE MLP layer. + + Args: + x (paddle.Tensor): Input tensor of shape [batch_size, seq_len, hidden_size] + or [seq_len, hidden_size] + + Returns: + paddle.Tensor: Output tensor with same shape as input + """ + current_device = self.gate_proj.weight.data.device + x = x.to(current_device) + x = F.silu(self.gate_proj(x)) * self.up_proj(x) + if self.moe_dropout_prob > 0: + x = F.dropout(input=x, p=self.moe_dropout_prob) + ret = self.down_proj(x) + return ret + + +def masked_fill(x, mask, value): + """ + Fills elements of the input tensor with a given value where mask is True. + """ + return torch.where(mask, torch.full_like(x, value), x) + + +def _squared_l2_norm(x: torch.Tensor) -> torch.Tensor: + """Computes 0.5 * sum(x^2)""" + return 0.5 * torch.sum(x * x) + + +@torch.no_grad() +def compute_optimal_transport(M, r, c, lam=1.0, epsilon=1e-8, max_iters: int = 10): + """ + Computes optimal transport matrix and Sinkhorn distance using Sinkhorn-Knopp algorithm. + """ + n, _ = M.shape + P = F.softmax(-M / lam, dim=1) # Applying softmax over columns + u = torch.zeros(n, dtype=torch.float32, device=M.device) + + for _ in range(max_iters): + P_sum_1 = P.sum(1) + if (u - P_sum_1).abs().max() < epsilon: + break + u = P_sum_1 + P *= (r / (u + 1e-8)).unsqueeze(1) + P *= (c / (P.sum(0) + 1e-8)).unsqueeze(0) + + P = torch.where(~P.isnan(), P, torch.zeros_like(P)) + return P, _ + + +class Top2Gate(nn.Module): + """ + Gate module implementing Top2Gating as described in Gshard paper. + """ + + def __init__(self, config, layer_idx: int, group=None, gate_weight=None) -> None: + """ + Initialize the MoE (Mixture of Experts) layer. + + Args: + config: Model configuration containing MoE parameters + layer_idx: Index of this layer in the model + group: Distributed communication group + gate_weight: Optional pre-existing gate weight tensor + """ + super().__init__() + self.config = config + + self.model_dim = config.hidden_size + self.num_experts = config.moe_num_experts + self.num_experts_tensor = ( + sum(config.moe_num_experts) + if config.multimodel_experts + else config.moe_num_experts + ) + + self.cap = config.moe_capacity + self.group = group + + self.layer_idx = layer_idx + + self.sinkhorn_2gate = config.sinkhorn_2gate + self.sinkhorn_temp = config.sinkhorn_temp + self.use_correction_bias = config.moe_use_aux_free # true + self.use_token_type_bias = config.get("moe_use_token_type_bias", False) + + self.act = partial(F.softmax, dim=-1) # [S,E] + + self.no_jitter = True + self.expert_drop = False + self.eye_matrix = None + self.eye_matrix_size = None + self.norm_gate_logits = config.moe_norm_gate_logits # true + self.one = torch.ones([], dtype=torch.float32) + + self.moe_aux_loss_lambda = torch.tensor(config.moe_aux_loss_lambda).to( + dtype=torch.float32 + ) + self.moe_z_loss_lambda = torch.tensor(config.moe_z_loss_lambda).to( + dtype=torch.float32 + ) + self.moe_orthogonal_loss_lambda = torch.tensor( + config.moe_orthogonal_loss_lambda + ).to(dtype=torch.float32) + + if self.moe_aux_loss_lambda.ndim == 0: + self.moe_aux_loss_lambda = self.moe_aux_loss_lambda.unsqueeze(0) + if self.moe_z_loss_lambda.ndim == 0: + self.moe_z_loss_lambda = self.moe_z_loss_lambda.unsqueeze(0) + if self.moe_orthogonal_loss_lambda.ndim == 0: + self.moe_orthogonal_loss_lambda = self.moe_orthogonal_loss_lambda.unsqueeze( + 0 + ) + + self.experts_type_ids = None + + self.eps = torch.tensor([1e-12]).to(dtype=torch.float32) + if config.multimodel_experts: + if config.get("moe_use_hard_gate", False): + self.num_experts_list = [] + self.experts_type_mask = [] + # hard-gate + group_experts 需要对gate_logits不同部分分开计算 + experts_ids = torch.zeros( + [sum(self.num_experts)], dtype=torch.int64 + ).reshape((1, -1)) + offset = 0 + for i, expert_num in enumerate(self.num_experts): + experts_ids[:, offset : offset + expert_num] = i + offset += expert_num + self.experts_type_ids = experts_ids.reshape([-1]) + logger.info( + f"use moe_use_hard_gate, experts_ids: {self.experts_type_ids}" + ) + for i, expert_num in enumerate(self.num_experts): + self.experts_type_mask.append( + self.experts_type_ids == i, + ) + self.num_experts_list.append(expert_num) + else: + # 非group_experts, 依赖token_type_bias实现hard-gate能力。 + assert ( + not config.moe_group_experts + ), "group_experts must use hard_gate when multimodel_experts is True" + else: + self.num_experts_list = [self.num_experts] + + if gate_weight is not None: + self.weight = gate_weight + + assert ( + not self.config.moe_use_token_type_bias + ), "gate_weights is from outside, token_type_bias can't be used" + logger.info("moe use gate_weight from outside") + # use fp32 pecison in amp + self._cast_to_low_precision = False + self._cast_to_low_precison = False + else: + self._create_gate_parameter() + logger.info( + f"{config.moe_gate}: w/ capacity: {self.cap} experts:{self.num_experts} " + f"use_token_type_bias:{self.use_token_type_bias} " + f"gate_act:{config.moe_gate_act} " + f"norm_gate_logits={self.norm_gate_logits} use_correction_bias={self.use_correction_bias}" + ) + + def _create_gate_parameter(self): + """ + Create gate weight parameter. + """ + if self.config.multimodel_experts: + # support setting lambda for each expert group + self.moe_z_loss_lambda = self.moe_z_loss_lambda.expand( + len(self.num_experts) + ) + self.moe_aux_loss_lambda = self.moe_aux_loss_lambda.expand( + len(self.num_experts) + ) + self.moe_orthogonal_loss_lambda = self.moe_orthogonal_loss_lambda.expand( + len(self.num_experts) + ) + + for i, num_experts in enumerate(self.num_experts): + if i == 1: + with UniqueNameGuard(f"mm_gate_{self.layer_idx}_"): + p = nn.Parameter( + torch.empty( + self.model_dim, + num_experts, + dtype=torch.float32, + device="cpu", + ) + ) + nn.init.xavier_uniform_(p) # Common initialization + else: + p = nn.Parameter( + torch.empty( + self.model_dim, + num_experts, + dtype=torch.float32, + device="cpu", + ) + ) + nn.init.xavier_uniform_(p) # Common initialization + self.register_parameter( + "weight" if i == 0 else f"weight_{i}", + p, + ) + else: + self.weight = nn.Parameter( + torch.empty(self.model_dim, self.num_experts, dtype=torch.float32) + ) + nn.init.xavier_uniform_(self.weight) # Common initialization + # use fp32 pecison in amp + self._cast_to_low_precision = False + self._cast_to_low_precison = False + + def get_gate_weight(self, transform_weight, is_multimodel=True): + """ + 在`multimodel_experts` 的情况下,将多个 weights merge 成一个整体 + transform_weight: bool, 按照 local-expert id 将 多模态 weight 交叠 + """ + if not is_multimodel or not self.config.multimodel_experts: + return self.weight + else: + return torch.cat( + [ + getattr(self, "weight" if i == 0 else f"weight_{i}") + for i in range(len(self.num_experts)) + ], + -1, + ) + + def forward( + self, + input: torch.Tensor, + token_type_ids: torch.Tensor = None, + transform_weight: bool = True, + correction_bias: torch.Tensor = None, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Forward pass through the gate. + + Args: + input: Input tensor of shape [Seq, Dim] + token_type_ids: Token type IDs tensor of shape [Seq] + transform_weight: Whether to transform weights for multimodal experts + correction_bias: Bias tensor for correction + + Returns: + tuple: (capacity, dispatch_mask, combine_weights, scatter_index, router_loss, logits) + """ + orig_dtype = input.dtype + current_device = input.device + weight = self.get_gate_weight(transform_weight) + + logits = F.linear( + input.to(dtype=torch.float32, device=current_device), + weight.T.to(dtype=torch.float32, device=current_device), + ) + + ( + capacity, + dispatch_mask, + combine_weights, + scatter_index, + l_aux, + l_zloss, + ) = self.top2_gating( + logits, + correction_bias=( + correction_bias.to(device=current_device) + if correction_bias is not None + else None + ), + ) + + combine_weights = combine_weights.to(orig_dtype) + return capacity, dispatch_mask, combine_weights, scatter_index, None, logits + + def get_capacity(self, num_tokens, cap_factor=None, is_multimodel=True): + """ + Calculate capacity based on number of tokens. + + Args: + num_tokens: Number of input tokens + cap_factor: Optional capacity factor override + + Returns: + int: Calculated capacity + """ + if is_multimodel and self.config.multimodel_experts: + num_experts = sum(self.num_experts_list) + elif isinstance(self.num_experts, (list, tuple)): + num_experts = self.num_experts[0] + else: + num_experts = self.num_experts + if cap_factor is not None: + cap = cap_factor + else: + if self.training: + cap = self.cap[0] + elif num_tokens < num_experts: # seqlen < num_expert + cap = self.cap[2] + else: + cap = self.cap[1] + # capacity = 2S/E + capacity = int(cap * num_tokens // num_experts) + assert ( + capacity > 0 + ), f"requires capacity to >= 0. cap={cap}, num_tokens={num_tokens}" + return capacity + + def top2_gating(self, logits, cap=None, correction_bias=None): + """ + Implement Top2 gating mechanism. + + Args: + logits: Input logits tensor + cap: Optional capacity override + correction_bias: Bias tensor for correction + + Returns: + tuple: (capacity, dispatch_masks, combine_weights, scatter_indexes, loss_aux, loss_z) + + Note: + capacity: The maximum number that each token can be dispatched. + dispatch_masks: Masks used for dispatching. The first element is the mask for the first + type of tokens; the second element is the mask for the second type of tokens. + combine_weights: Weights used for combining. The first element is the weight for the first + type of tokens; the second element is the weight for the second type of tokens. + scatter_indexes: Indexes used for scattering. The first element is the index for the first + type of tokens; the second element is the index for the second type of tokens. + loss_aux: Auxiliary loss. + loss_z: Z loss. + """ + gates = self.act(logits) + + # gates has shape of SE + assert logits.ndim == 2, logits.shape + num_tokens = gates.shape[0] + num_experts = gates.shape[1] + # capacity = 2S/E + capacity = self.get_capacity(logits.shape[0], cap) + current_device = logits.device + + # Create a mask for 1st's expert per token + score_for_argmax = ( + gates + correction_bias.unsqueeze(0) + if correction_bias is not None + else gates + ) + indices1_s = torch.argmax(score_for_argmax, dim=1) + mask1 = F.one_hot(indices1_s, num_classes=num_experts).to( + dtype=torch.int64, device=current_device + ) # [0,1] + + # Create a mask for 2nd's expert per token using Gumbel-max trick + # https://timvieira.github.io/blog/post/2014/07/31/gumbel-max-trick/ + if self.training and not self.no_jitter: + gumbels = ( + -torch.empty_like( + logits, + device=current_device, + ) + .exponential_() + .log() + ) # ~Gumbel(0,1) + logits_w_noise = logits + gumbels + else: + logits_w_noise = logits + + logits_except1 = masked_fill( + logits_w_noise, + mask1.to(dtype=torch.bool, device=current_device), + float("-inf"), + ) + score_for_argmax = ( + self.act(logits_except1) + correction_bias.unsqueeze(0) + if correction_bias is not None + else logits_except1 + ) + indices2_s_original = torch.argmax(score_for_argmax, dim=1) + + if self.training and self.sinkhorn_2gate: + r = ( + torch.ones(num_tokens, dtype=torch.float32, device=current_device) + / num_tokens + ) + c_mask_sum = mask1.to(dtype=torch.float32, device=current_device).sum(0) + c = capacity - c_mask_sum + c = torch.maximum(c, torch.zeros_like(c, device=current_device)) + c_sum = c.sum() + if c_sum > 0: + c = c / c_sum + else: # Avoid division by zero if all experts are full from top-1 + c = torch.ones_like(c, device=current_device) / num_experts + + pi, _ = compute_optimal_transport( + -logits_except1.to(dtype=torch.float32, device=current_device).detach(), + r, + c, + lam=self.sinkhorn_temp, + ) + pi = masked_fill( + pi, mask1.to(dtype=torch.bool, device=current_device), float("-inf") + ) + indices2_s = torch.argmax(pi, dim=1) + else: + indices2_s = indices2_s_original + + mask2 = F.one_hot(indices2_s, num_classes=self.num_experts).to( + dtype=torch.int64, device=current_device + ) + + # Compute locations in capacity buffer + locations1 = ( + torch.cumsum(mask1, dim=0) - 1 + ) # [0,1,1,0,1,0,0] -> [0,0,0,0,1,1,1,] + locations2 = torch.cumsum(mask2, dim=0) - 1 + # Update 2nd's location by accounting for locations of 1st + locations2 += torch.sum(mask1, dim=0, keepdim=True) + + # Remove locations outside capacity from mask + mask1 = mask1 * (locations1 < capacity).to( + dtype=torch.int64, device=current_device + ) # [0,1,1,0,0,0,0] + mask2 = mask2 * (locations2 < capacity).to( + dtype=torch.int64, device=current_device + ) + + # Store the capacity location for each token + locations1_s = torch.sum(locations1 * mask1, dim=1) + locations2_s = torch.sum(locations2 * mask2, dim=1) + + # Normalize gate probabilities + mask1_float = mask1.to(dtype=torch.float32, device=current_device) + mask2_float = mask2.to(dtype=torch.float32, device=current_device) + gates1_s = (gates * mask1_float).sum(dim=-1) + gates2_s = (gates * mask2_float).sum(dim=-1) + # logger.info(f'gates1_s:{gates1_s} gates2_s:{gates2_s} logits:{logits}') + + if self.norm_gate_logits: + denom_s = gates1_s + gates2_s # [0.2, 0.3] + # Avoid divide-by-zero + denom_s = torch.clamp(denom_s, min=1e-6) + gates1_s /= denom_s + gates2_s /= denom_s + if self.training and self.expert_drop: + # log.debug(gates2_s) + gates2_s = torch.where( + 2 * gates2_s < torch.rand_like(gates2_s, device=current_device), + torch.zeros_like(gates2_s, device=current_device), + gates2_s, + ) + + # Calculate combine_weights and dispatch_mask + gates1 = gates1_s.unsqueeze(1) * mask1_float + gates2 = gates2_s.unsqueeze(1) * mask2_float + + combine1_weight, expert1_index = torch.max(gates1, dim=-1, keepdim=True) + scatter1_index = expert1_index.squeeze(-1) * capacity + locations1_s + scatter1_index = scatter1_index.to(dtype=torch.int64, device=current_device) + dispatch1_mask = combine1_weight.to( + dtype=torch.bool, device=current_device + ).detach() + + combine2_weight, expert2_index = torch.max(gates2, dim=-1, keepdim=True) + scatter2_index = expert2_index.squeeze(-1) * capacity + locations2_s + scatter2_index = scatter2_index.to(dtype=torch.int64, device=current_device) + dispatch2_mask = combine2_weight.to( + dtype=torch.bool, device=current_device + ).detach() + # logger.info(f'expert-id: {expert1_index} vs {expert2_index}, mask:{mask1_float} vs {mask2_float}') + + return ( + capacity, + torch.cat((dispatch1_mask, dispatch2_mask), 1), + torch.cat((combine1_weight, combine2_weight), 1), + torch.stack((scatter1_index, scatter2_index), 1), + None, + None, + ) + + def _cal_orthogonal_loss_opt_each_weight(self, weight, use_group): + """ + Calculate optimized orthogonal loss for each weight. + + Args: + weight: Weight tensor + use_group: Whether to use expert groups + + Returns: + Tensor: Calculated orthogonal loss + """ + if weight.dtype != torch.float32: + weight = weight.to(torch.float32) + + wnorm = torch.norm(weight, p=2, dim=1) + weight = weight / torch.maximum(wnorm, self.eps.to(weight.device)).unsqueeze(1) + + if use_group: + weight = weight.reshape( + [self.config.moe_k, -1, weight.shape[1]] + ) # [K, E/K, H] + eye_matrix = torch.eye( + weight.shape[1], dtype=weight.dtype, device=weight.device + ).unsqueeze(0) + else: + eye_matrix = torch.eye( + weight.shape[0], dtype=weight.dtype, device=weight.device + ) + + weight_matmul = torch.matmul(weight, weight.T) + + orthogonal_loss = weight_matmul - eye_matrix + orthogonal_loss = _squared_l2_norm(orthogonal_loss) / ( + orthogonal_loss.size(0) * orthogonal_loss.size(1) + ) + return orthogonal_loss + + +class TopKGate(Top2Gate): + """ + Fused version of TopK gate for improved performance. + """ + + def forward( + self, + input: torch.Tensor, + token_type_ids=None, + transform_weight=True, + is_multimodel=True, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Forward pass for fused gate. + + Args: + input: Input tensor + token_type_ids: Token type IDs + transform_weight: Whether to transform weights + + Returns: + tuple: (logits, capacity, router_loss) + """ + current_device = input.device + weight = self.get_gate_weight(transform_weight, is_multimodel=is_multimodel) + + logits = F.linear( + input.to(dtype=torch.float32, device=current_device), + weight.T.to(dtype=torch.float32, device=current_device), + ) + if self.use_token_type_bias: + assert token_type_ids is not None + assert ( + token_type_ids.max() < self.bias.shape[0] + ), f"token_type_ids {token_type_ids.max()} >= bias shape {self.bias.shape[0]}" + bias = self.bias[token_type_ids] # [seq] + logits = logits + bias + + return logits + + +gate_class = dict( + top2=Top2Gate, + topk=TopKGate, +) + + +def get_gate( + config: Ernie4_5_MoEConfig, + expert: nn.Module, + layer_idx: int, +) -> Tuple[nn.Module, nn.ModuleList]: + """Initialize and distribute MoE (Mixture of Experts) components. + + Creates gate layer and distributed expert network for MoE architecture. + + Args: + config (Ernie4_5_MoEConfig): Configuration for MoE architecture + expert (nn.Module): Prototype expert network to be replicated + layer_idx (int): Index of current layer in transformer stack + + Returns: + Tuple[nn.Module, nn.ModuleList]: + - gate: Initialized gate layer for routing + - experts: ModuleList containing expert networks + """ + moe_num_experts = ( + sum(config.moe_num_experts) + if config.multimodel_experts + else config.moe_num_experts + ) + experts = nn.ModuleList([]) + + for expert_id, (experts_num, fc) in enumerate(expert): + experts_to_append = [] + if not hasattr(fc, "__len__"): # run this + experts_to_append.append(fc) + if expert_id == 1: + with UniqueNameGuard("_mm_deepcopy"): + for _ in range(experts_num - 1): + experts_to_append.append(deepcopy(fc)) + else: + for _ in range(experts_num - 1): + experts_to_append.append(deepcopy(fc)) + else: + experts_to_append = fc + for ex in experts_to_append: + for p in ex.parameters(): + p.expert_type = f"expert_type_{expert_id}" # Different `expert_type` can have different intermediate-size + index = 0 + for i in range(experts_num): + if i // experts_num == 0: + experts.append(experts_to_append[index]) + index += 1 + else: + experts.append(None) + + assert ( + len(experts) == moe_num_experts + ), f"experts.len={len(experts)} != experts_num={experts_num}" + logger.info(f"MOE-GATE:-{config.moe_gate}") + + gate = gate_class[config.moe_gate.lower()](config, layer_idx=layer_idx) + + if config.multimodel_experts and config.moe_use_hard_gate and moe_num_experts > 2: + lm_experts = experts[: config.moe_num_experts[0]] + lm_gate = gate + else: + if config.multimodel_experts and config.moe_use_hard_gate: + lm_gate, lm_experts = gate, experts + else: + lm_gate, lm_experts = None, None + + logger.info(f"LM-experts-{lm_experts} -- experts-{experts}") + + return gate, experts, lm_gate, lm_experts + + +class MoEStatics(nn.Module): + """ + Stores MoE (Mixture of Experts) statistics + and expert usage information. + """ + + def __init__(self, config, layer_idx): + """ + Initialize MoE statistics tracking. + + Args: + config: Model configuration containing MoE parameters + layer_idx: Index of the MoE layer in the model + """ + super().__init__() + self._cast_to_low_precision = False + self._cast_to_low_precison = False + num_experts = ( + config.moe_num_experts[0] + if config.multimodel_experts + else config.moe_num_experts + ) + if config.multimodel_experts: + assert ( + len(set(config.moe_num_experts)) == 1 + ), "assume expert group has same size, got: {config.moe_num_experts}" + + with UniqueNameGuard(f"mm_layer_{layer_idx}_"): + num_experts_groups = ( + len(config.moe_num_experts) if config.multimodel_experts else 1 + ) + p = nn.Parameter( + torch.zeros(num_experts_groups, num_experts, dtype=torch.float32), + requires_grad=False, + ) + self.e_score_correction_bias = p + p = torch.zeros(num_experts_groups, num_experts, dtype=torch.int64) + self.expert_usage = p + + +def dispatching(x, dispatch_mask, scatter_index, num_experts, capacity): + """ + Reorders input tensor based on gate results with capacity truncation and padding. + + Args: + x (Tensor): Input tensor of shape [Seq, Dim] + dispatch_mask (Tensor): Dispatching mask of shape [Seq, 2] + scatter_index (Tensor): Scatter indices of shape [Seq, 2] + num_experts (int): Number of experts + capacity (int): Capacity per expert + + Returns: + Tensor: Dispatched output tensor of shape [Expert*Capacity, Dim] + """ + output = None + orig_dtype = x.dtype + scatter_index_unbound = [scatter_index[:, 0], scatter_index[:, 1]] + dispatch_mask_unbound = [dispatch_mask[:, 0], dispatch_mask[:, 1]] + + for i_scatter_index, i_dispatch_mask in zip( + scatter_index_unbound, dispatch_mask_unbound + ): + updates = x * i_dispatch_mask.unsqueeze(-1).to(orig_dtype) # [seq, dim] + init_output = torch.zeros( + num_experts * capacity, x.shape[-1], dtype=orig_dtype, device=x.device + ) + + index = i_scatter_index.unsqueeze(-1).expand(-1, x.shape[-1]) # [seq, dim] + if output is None: + output = init_output.scatter_add(0, index, updates) + else: + output = output + init_output.scatter_add(0, index, updates) + if output.dtype != orig_dtype: + output = output.to(orig_dtype) + return output + + +def combining(x, combine_weights, scatter_index): + """ + Combines and aggregates input matrix using combination weights. + + Args: + x (Tensor): Input tensor of shape [num_experts * capacity, dim] + combine_weights (Tensor): Combination weights of shape [seq, 2] + scatter_index (Tensor): Scatter indices of shape [seq, 2] + + Returns: + Tensor: Combined output tensor of shape [seq, dim] + """ + dim = x.shape[-1] + + current_device = scatter_index.device + x = x.to(current_device) + scatter_index = scatter_index.reshape([-1]) + num_k = combine_weights.shape[-1] + + combine_weights = combine_weights.unsqueeze(1).to(current_device) + + x = x[scatter_index].reshape([-1, num_k, dim]) # [seq, 2, dim] + + return torch.matmul(combine_weights, x).squeeze( + 1 + ) # [seq, 1, 2] @ [seq, 2, dim] -> [seq, 1, dim] + + +class MOELayer(nn.Module): + """ + Mixture of Experts layer implementation based on GShard paper. + """ + + def __init__( + self, + gate: nn.Module, + experts: List[nn.Module], + layer_idx: int, + shared_experts: Optional[List[nn.Module]] = None, + group=None, + recompute: bool = False, + k: int = 2, + all_to_all_dropout: float = 0, + group_experts: bool = False, + moe_statics=None, + moe_num_experts=None, + ): + """ + Initialize MoE layer. + + Args: + gate: Gate network for expert selection + experts: List of expert networks + layer_idx: Index of this layer in the model + group: Distributed communication group + recompute: Whether to enable recomputation + k: Number of experts to select per token + all_to_all_dropout: Dropout rate for all-to-all communication + group_experts: Whether to group experts + moe_statics: MoE statistics tracking object + """ + super().__init__() + self.gate = gate + self.layer_idx = layer_idx + + if isinstance(experts, nn.ModuleList): + self.experts = experts + else: + logger.info(f"using fused experts, type={type(experts)}") + self.experts = experts + self.shared_experts = shared_experts + + self.group = group + self.k = k + self.all_to_all_dropout = all_to_all_dropout + self.use_correction_bias = moe_statics is not None + self.moe_statics = moe_statics + if self.use_correction_bias: + logger.info( + f"using correction bias, aux-coef:{self.gate.config.moe_aux_loss_lambda}" + ) + assert self.gate.config.moe_use_aux_free + + try: + self.world_size = torch.distributed.get_world_size() + self.rank = torch.distributed.get_rank() + except: + self.world_size = 1 + self.rank = 0 + if self.world_size < 1: + self.world_size = 1 + if self.rank < 0: + self.rank = 0 + + self.multimodal_experts = ( + isinstance(moe_num_experts, (tuple, list)) and len(moe_num_experts) > 1 + ) + self.num_local_experts = len(self.experts) // self.world_size + if self.multimodal_experts: + self.num_local_multimodal_experts = [ + num // self.world_size for num in moe_num_experts + ] + self.multimodal_expert_index = [0] + list( + itertools.accumulate(moe_num_experts) + ) + + self.input_preprocess = self.output_postprocess = None + self.group_experts = group_experts + self.config = self.gate.config + self.zero = torch.tensor(0).to(dtype=torch.float32) + + def forward_experts(self, dispatched_input): + """ + Forward pass through experts sequentially. + + Args: + dispatched_input: Input tensor of shape [num_experts, capacity, dim] + + Returns: + Tensor: Expert outputs of shape [num_experts, capacity, dim] + """ + + if not self.multimodal_experts: + true_experts = self.experts[ + self.rank + * self.num_local_experts : (self.rank + 1) + * self.num_local_experts + ] + else: + true_experts = [] + for i, num in enumerate(self.num_local_multimodal_experts): + current_modal_experts = self.experts[ + self.multimodal_expert_index[i] : self.multimodal_expert_index[ + i + 1 + ] + ] + true_experts.extend( + current_modal_experts[self.rank * num : (self.rank + 1) * num] + ) + + dispatched_input = dispatched_input.reshape( + [self.world_size, self.num_local_experts, -1, dispatched_input.shape[-1]] + ) + current_device = dispatched_input.device + expert_outputs = [] + if isinstance(self.experts, nn.ModuleList): + chunks = dispatched_input.permute(1, 0, 2, 3).contiguous().unbind(0) + assert len(chunks) == len( + true_experts + ), f"{len(chunks)}, {len(true_experts)}" + for chunk, expert in zip(chunks, true_experts): + expert_outputs.append(expert(chunk)) + else: + dispatched_input = dispatched_input.permute(1, 0, 2, 3).contiguous() + orig_shape = dispatched_input.shape + chunks = dispatched_input.reshape(orig_shape[0], -1, orig_shape[-1]) + chunks = self.experts(chunks) + chunks = chunks.reshape(orig_shape[:-1] + (chunks.shape[-1],)).unbind(0) + expert_outputs.extend(chunks) + + for i, expert_output in enumerate(expert_outputs): + expert_outputs[i] = expert_output.to(current_device) + expert_output = torch.stack(expert_outputs, dim=1) + return expert_output + + def moe_gate_dispatch( + self, + x: torch.Tensor, # [S, H] float16 / float32 / bfloat16 + gate_logits: torch.Tensor, # [S, E] float32 + k: int, + capacity: Optional[int], + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """dispatch input to experts based on gate logits""" + + S, H = x.shape + E = gate_logits.shape[1] + device = x.device + topk_prob, topk_idx = torch.topk(gate_logits, k, dim=-1) # [S, k] + combine_weights = topk_prob # [S, k] + expert_id = topk_idx # [S, k] + y = x.new_zeros((E, capacity, H)) # [E, C, H] + scatter_index = x.new_full((k, S), -1, dtype=torch.int32) # [k, S] + # per-expert slot counters + slot_counter = torch.zeros(E, dtype=torch.int32, device=device) + + for tok in range(S): + for route in range(k): + e = expert_id[tok, route].item() + slot = slot_counter[e].item() + if slot >= capacity: # expert is full -> drop + combine_weights[tok, route] = 0.0 + continue + # record mapping & dispatch activation + scatter_index[route, tok] = e * capacity + slot + y[e, slot] = x[tok] + slot_counter[e] += 1 + + expert_offset = torch.cumsum(slot_counter, 0, dtype=torch.int64) + + return y, combine_weights, scatter_index, expert_offset, expert_id + + def gate_and_dispatch(self, input, token_type_ids=None, is_multimodel=True): + """ + Calculate gate and dispatch inputs. + + Args: + input: Input tensor of shape [seq, dim] + + Returns: + tuple: (dispatched_input, combine_weights, dispatch_mask, + scatter_index, router_loss, gate_logits, gate_prob) + """ + d_model = input.shape[1] + if isinstance(self.gate, (TopKGate)): + capacity = self.gate.get_capacity( + input.shape[0], is_multimodel=is_multimodel + ) + if token_type_ids is not None: + token_type_ids = token_type_ids.reshape([-1]) + gate_logits = self.gate( + input, token_type_ids=token_type_ids, is_multimodel=is_multimodel + ) + prob = self.gate.act(gate_logits) + ( + dispatched_input, + combine_weights_unnorm, + scatter_index, + dispatch_mask, + _, + ) = self.moe_gate_dispatch(input, prob, k=self.k, capacity=capacity) + dispatch_mask = torch.diff(F.pad(dispatch_mask, (1, 0))) + + scatter_index.detach() + dispatch_mask.detach() + + scatter_index = scatter_index.transpose(0, 1) # [k, s] -> [s, k] + combine_weights = combine_weights_unnorm / torch.clamp( + combine_weights_unnorm.sum(dim=-1, keepdim=True), min=1e-12 + ) + combine_weights = combine_weights.to(dtype=dispatched_input.dtype) + + else: + ( + capacity, + dispatch_mask, + combine_weights, + scatter_index, + router_loss, + gate_logits, + ) = self.gate( + input, + ) + prob = None + dispatched_input = dispatching( + input, + dispatch_mask, + scatter_index, + num_experts=self.world_size * self.num_local_experts, + capacity=capacity, + ) + + dispatched_input = dispatched_input.reshape( + [self.world_size * self.num_local_experts, capacity, d_model] + ) + + dispatch_mask = dispatch_mask.detach() + scatter_index = scatter_index.detach() + return ( + dispatched_input, + combine_weights, + dispatch_mask, + scatter_index, + None, + gate_logits, + prob, + ) + + def combine_expert_output(self, expert_output, combine_weights, scatter_index): + """ + Combine expert outputs using combination weights. + + Args: + expert_output: Expert outputs [num_experts, capacity, dim] + combine_weights: Combination weights + scatter_index: Scatter indices + + Returns: + Tensor: Combined output [seqlen, dim] + """ + expert_output = expert_output.reshape( + [-1, expert_output.shape[-1]] + ) # [e*1,c,m] + + combined_output = combining(expert_output, combine_weights, scatter_index) + + if self.output_postprocess is not None: + combined_output = self.output_postprocess(combined_output) + + return combined_output + + def forward( + self, + input: torch.Tensor, + token_type_ids=None, + is_multimodel=True, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Forward pass through MoE layer. + + Args: + input: Input tensor of shape [s, d] + + Returns: + tuple: (output, combine_weights, router_loss, gate_logits) + """ + if input.dim() == 3: + orig_shape = input.shape + input = input.reshape([-1, input.shape[-1]]) + else: + orig_shape = None + assert ( + input.dim() == 2 + ), f"input Tensor must have dimensions: (s)equence, (d)im, got:{input.shape}" + if token_type_ids is not None: + token_type_ids = token_type_ids.clone()[:, :-1] + + assert self.gate is not None + + gate_input = input + + ( + dispatched_input, + combine_weights, + dispatch_mask, + scatter_index, + router_loss, + gate_logits, + gate_prob, + ) = self.gate_and_dispatch( + gate_input, token_type_ids, is_multimodel=is_multimodel + ) + + if self.shared_experts is not None: + shared_out = self.shared_experts(input) + + expert_out = self.forward_experts(dispatched_input) + + combined_output = self.combine_expert_output( + expert_out, combine_weights, scatter_index + ) + + if self.shared_experts is not None: + combined_output += shared_out + + if orig_shape: + combined_output = combined_output.clone().reshape( + orig_shape[:-1] + (combined_output.shape[-1],) + ) + return combined_output, combine_weights, None, gate_logits + + +class MOEAllGatherLayerV2(MOELayer): + """ + MoE Layer with allgather implement. + """ + + def __init__( + self, + gate: nn.Module, + experts: List[nn.Module], + layer_idx, + shared_experts: Optional[List[nn.Module]] = None, + group=None, + recompute=False, + k=2, + enable_reverse_token_drop=False, + all_to_all_dropout=0, + group_experts=False, + use_expert_out_alltoall=True, # + use_expert_alltoall_overlap=False, + use_padding=True, + dense_token_type=3, # considerd as dense tokens (no moe) + moe_statics=None, + moe_num_experts=None, + ): + super().__init__( + gate, + experts, + layer_idx, + shared_experts, + group, + recompute, + k, + all_to_all_dropout, + group_experts, + moe_statics, + moe_num_experts, + ) + self.enable_reverse_token_drop = enable_reverse_token_drop + self.is_allgather_moe_layer = True + self.use_padding = use_padding + + self.send_rank = None + self.local_expert_id = None + self.dense_experts = None + self.dense_token_type = dense_token_type + self.capacity_tensor = None + logger.info( + f"uisng MOEAllGatherLayerV2, use_expert_out_alltoall={use_expert_out_alltoall}, " # false + f"use_padding={use_padding}, use_expert_alltoall_overlap={use_expert_alltoall_overlap} " # true false + f"enable_reverse_token_drop={self.enable_reverse_token_drop}" # false + ) + self.two = torch.tensor(2).to(dtype=torch.float32) + self.zero = torch.tensor(0).to(dtype=torch.float32) + + def forward( + self, + input: torch.Tensor, + token_type_ids=None, + use_dense_expert=False, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Implements forward pass for Mixture-of-Experts (MoE) layer with distributed communication. + + Core Functionality: + - Processes input through gating network to determine expert assignments + - Combines expert outputs and calculates routing loss + + Key Features: + 1. Supports both dense and sparse expert computation modes + 2. Implements fused gating and dispatch for performance optimization + 3. Handles sequence length padding/unpadding for irregular inputs + 4. Enables communication-computation overlap through asynchronous operations + + Args: + input (Tensor): Input tensor of shape [seq_len, hidden_dim] + token_type_ids: Optional segmentation markers for heterogeneous inputs + use_dense_expert: Flag to enable dense expert computation bypass + + Returns: + tuple: ( + combined_output: Aggregated expert outputs [seq_len, hidden_dim], + combine_weights: Expert combination coefficients, + ) + """ + use_fuse = isinstance(self.gate, (TopKGate)) + assert use_fuse + if input.ndim == 3: + orig_shape = input.shape + input = input.reshape([-1, input.shape[-1]]) + else: + orig_shape = None + + assert ( + len(input.shape) == 2 + ), f"input Tensor must have dimensions: (s)equence, (d)im, got:{input.shape}" + dispatch_token_type_ids = None + global_dense_expert_mask = None + if token_type_ids is not None: + token_type_ids = token_type_ids[:, :-1].reshape([-1]) + dispatch_token_type_ids = token_type_ids + if use_dense_expert: + global_dense_expert_mask = ( + dispatch_token_type_ids == self.dense_token_type + ) + + assert self.gate is not None + + ( + dispatched_input, + global_hidden_states, + local_combine_weights, + expert_num_global_no_token_drop, + expert_num_global, + expert_num_global_list, + local_scatter_index, + scatter_index_rev, + router_loss, + (gate_logits, gate_prob), + (gate_logits_mm, gate_prob_mm), + expert_num_local, + ) = self.fused_gate_and_dispatch( + input, token_type_ids, global_dense_expert_mask + ) + + seqlen_this_mp = input.shape[0] + if len(scatter_index_rev): + recv_rank_local = scatter_index_rev // seqlen_this_mp + else: + recv_rank_local = scatter_index_rev + + if self.send_rank is None: + capacity = self.gate.get_capacity(input.shape[0]) + self.send_rank = ( + torch.arange(1) + .repeat_interleave(capacity * self.num_local_experts) + .to(torch.int32) # cap + ) + self.local_expert_id = ( + torch.arange(self.num_local_experts) + .repeat_interleave(capacity) + .repeat(1) + .to(self.send_rank.dtype) + ) + send_rank = self.send_rank + local_expert_id = self.local_expert_id + + expert_outs = self.forward_experts(*dispatched_input) + for e in expert_outs: + if e is not None: + current_device = e.device + break + expert_outs = torch.cat( + [e.to(current_device) for e in expert_outs if e is not None], dim=0 + ) # [e*c,m] + + # global -> local + combined_output = self.combine_expert_output( + expert_outs, local_combine_weights, local_scatter_index + ) + + if self.shared_experts is not None: + shared_out = self.shared_experts(input).to(combined_output.device) + combined_output += shared_out + + if orig_shape: + combined_output = combined_output.reshape( + *orig_shape[:-1], combined_output.shape[-1] + ) + + return combined_output, local_combine_weights, None, gate_logits + + def _expand_modality_expert_id( + self, + expert_id: torch.Tensor, # (seqlen, k) + seqlen: int, + k: int, + num_expert_per_modality: int, + group_size: int, + modality_offset: int, + is_group_expert: bool, + ) -> torch.Tensor: + """ + expert_id: tensor of shape (seqlen, k), containing expert ids + Returns: tensor of same shape, with updated expert ids + """ + device = expert_id.device + expert_id = expert_id.clone() + + if is_group_expert: + # idx % k * group_size + offsets = (torch.arange(k, device=device) * group_size).view( + 1, k + ) # shape (1, k) + expert_id += offsets + + if num_expert_per_modality <= 0: + return expert_id + + # Compute rank and local expert id + rank = expert_id // num_expert_per_modality + expert_id_in_rank = expert_id % num_expert_per_modality + + # Compute new expert id with modality-aware adjustment + expert_id_out = ( + rank * (num_expert_per_modality * 2) # 2 modalities assumed + + expert_id_in_rank + + modality_offset * num_expert_per_modality + ) + + return expert_id_out + + def expand_modality_expert_id( + self, + expert_id, + num_expert_per_modality, + group_size, + modality_offset, + is_group_expert, + ): + """expand expert id for modality aware moe layer""" + seq_len, k = expert_id.shape + + return self._expand_modality_expert_id( + expert_id, + seq_len, + k, + num_expert_per_modality, + group_size, + modality_offset, + is_group_expert, + ) + + def fused_gate_logits_process_fused( + self, gate_logits_lm, gate_logits_mm=None, token_type_ids=None + ): + """Process gating logits for expert selection in Mixture-of-Experts (MoE) layers. + + Core Functionality: + - Transforms raw gating logits into expert selection weights and IDs + - Supports both grouped and standard expert selection modes + - Handles bias correction for improved expert load balancing + + Args: + gate_logits_lm (Tensor): Raw gating scores of shape [batch_size, total_experts] + + Returns: + tuple: ( + lm_weight_and_expert_id: Combined tensor containing selection weights + and expert IDs [batch_size, 2*top_k], + prob_flat: Flattened expert probabilities [batch_size, total_experts] + ) + """ + top_k = self.k + num_expert_per_rank_per_modality = gate_logits_lm.shape[-1] + group_size = gate_logits_lm.shape[-1] // top_k + if self.group_experts: + assert not self.use_correction_bias + gate_logits_lm = gate_logits_lm.reshape( + [gate_logits_lm.shape[0], top_k, -1] + ) + prob_lm = self.gate.act(gate_logits_lm) + prob_lm_ = prob_lm + weight_lm, expert_id_lm = prob_lm_.topk(k=1, dim=-1) + weight_lm = weight_lm.reshape([gate_logits_lm.shape[0], -1]) + group_size = gate_logits_lm.shape[-1] + expert_id_lm = expert_id_lm.squeeze(-1) + else: + prob_lm = self.gate.act(gate_logits_lm) + if self.use_correction_bias: + prob_lm_ = prob_lm + self.moe_statics.e_score_correction_bias[ + 0 + ].detach().to(prob_lm.device) + else: + prob_lm_ = prob_lm + weight_lm, expert_id_lm = prob_lm_.topk(k=top_k, dim=-1) + + if self.use_correction_bias: + batch_idx = ( + torch.arange(prob_lm_.shape[0]).unsqueeze(-1).expand_as(expert_id_lm) + ) + weight_lm = prob_lm[batch_idx, expert_id_lm] # use correct bias + + expert_id_lm = self.expand_modality_expert_id( + expert_id_lm, + num_expert_per_modality=( + num_expert_per_rank_per_modality if token_type_ids is not None else 0 + ), + group_size=group_size, + modality_offset=0, + is_group_expert=self.group_experts, + ) + expert_id_lm = expert_id_lm.reshape(weight_lm.shape) + lm_weight_and_expert_id = torch.cat( + [weight_lm, expert_id_lm.to(torch.float32)], -1 + ) + + if token_type_ids is None or gate_logits_mm is None: + return ( + lm_weight_and_expert_id, + prob_lm.reshape([prob_lm.shape[0], -1]), + None, + ) + + prob_mm = self.gate.act(gate_logits_mm) + if self.use_correction_bias: + prob_mm_ = prob_mm + self.moe_statics.e_score_correction_bias[ + 1 + ].detach().to(prob_lm.device) + else: + prob_mm_ = prob_mm + weight_mm, expert_id_mm = prob_mm_.topk(k=top_k, dim=-1) + if self.use_correction_bias: + batch_idx = ( + torch.arange(prob_lm_.shape[0]).unsqueeze(-1).expand_as(expert_id_lm) + ) + weight_mm = prob_mm[batch_idx, expert_id_mm] # use correct bias + + expert_id_mm = self.expand_modality_expert_id( + expert_id_mm, + num_expert_per_modality=num_expert_per_rank_per_modality, + group_size=group_size, + modality_offset=1, + is_group_expert=False, + ) + expert_id_mm = expert_id_mm.reshape(weight_mm.shape) + mm_weight_and_expert_id = torch.cat( + [weight_mm, expert_id_mm.to(torch.float32)], -1 + ) + weight_and_expert = torch.where( + (token_type_ids == 0).unsqueeze(-1), + lm_weight_and_expert_id.to(token_type_ids.device), + mm_weight_and_expert_id.to(token_type_ids.device), + ) + return weight_and_expert, prob_lm.reshape([prob_lm.shape[0], -1]), prob_mm + + def moe_gate_dispatch_partial_nosoftmaxtopk( + self, + x, + combine_weights, + expert_id, + k, + num_experts, + ): + """ + MoE Gate Dispatch kernel + """ + device = x.device + dtype = x.dtype + num_rows, hidden_size = x.shape + k = expert_id.shape[1] + expert_ids_flat = expert_id.reshape(-1) # [num_rows * k] + combine_weights_flat = combine_weights.reshape(-1) # [num_rows * k] + + expanded_token_ids = torch.arange(num_rows * k, device=device) # [num_rows * k] + + sorted_expert_ids, sorted_indices = torch.sort(expert_ids_flat, stable=True) + sorted_indices = sorted_indices.to(expanded_token_ids.device) + + sorted_expanded_token_ids = expanded_token_ids[sorted_indices] + + expert_nums_local = torch.zeros(num_experts, dtype=torch.int64, device=device) + + for expert_idx in range(num_experts): + count = (sorted_expert_ids == expert_idx).sum().item() + expert_nums_local[expert_idx] = count + + total_dispatched_tokens = torch.cumsum(expert_nums_local, dim=0)[-1].item() + + y = x[sorted_indices // k] # [total_dispatched_tokens, hidden_size] + + scatter_index = torch.full((k, num_rows), -1, dtype=torch.int32, device=device) + + for i, (expanded_idx, sorted_pos) in enumerate( + zip(sorted_expanded_token_ids, range(total_dispatched_tokens)) + ): + token_idx = expanded_idx // k + k_idx = expanded_idx % k + scatter_index[k_idx, token_idx] = sorted_pos + + scatter_index_rev = sorted_indices // k + + combine_weights_out = combine_weights.clone() + + return ( + y, # [total_dispatched_tokens, hidden_size] + combine_weights_out, # [num_rows, k] + scatter_index, # [k, num_rows] + scatter_index_rev, # [total_dispatched_tokens] + expert_nums_local, # [num_experts] + expert_nums_local, # [num_experts] + ) + + def fused_gate_and_dispatch( + self, input, token_type_ids=None, global_dense_expert_mask=None + ): + """Implements fused expert gating and token dispatch logic for Mixture-of-Experts (MoE) layers. + + Core Functionality: + - Computes expert selection probabilities and routing weights + - Performs distributed token-to-expert assignment + - Handles communication and synchronization in model-parallel environments + + Args: + input (Tensor): Input tensor of shape [seq_len, hidden_dim] + + Returns: + tuple: ( + dispatched_input: Expert-assigned tokens [num_experts, capacity, hidden_dim], + global_hidden_states: Full sequence representations, + local_combine_weights: Local expert combination weights, + expert_num_global_notrunc: Global expert token counts (without capacity truncation), + expert_num_global: Actual expert token counts, + expert_num_global_list: Per-expert token counts, + local_scatter_index: Local token reorganization indices, + scatter_index_rev: Reverse scattering indices, + router_loss: Calculated routing loss, + gate_outputs: Raw gating network outputs, + expert_num_local: Local expert utilization counts + ) + """ + seqlen, d_model = input.shape + args = () + if token_type_ids is not None: + token_type_ids = token_type_ids.reshape([-1]) + args = (token_type_ids,) + + router_loss = torch.zeros([1], dtype=torch.float32) + top_k = self.k + + def build_weights_and_expert_id(input): + nonlocal token_type_ids, args + logits = self.gate(input, *args, transform_weight=False) + if self.config.multimodel_experts: + gate_logits_lm, gate_logits_mm = logits.chunk(2, dim=-1) + else: + gate_logits_lm, gate_logits_mm = logits, None + + weigth_and_expert, gate_prob_lm, gate_prob_mm = ( + self.fused_gate_logits_process_fused( + gate_logits_lm, + gate_logits_mm, + token_type_ids if global_dense_expert_mask is None else None, + ) + ) + return ( + weigth_and_expert, + gate_logits_lm, + gate_logits_mm, + gate_prob_lm, + gate_prob_mm, + ) + + capacity = self.gate.get_capacity(input.shape[0]) * self.world_size + global_hidden_states = input + ( + combine_weights_and_expert_id, + gate_logits_lm, + gate_logits_mm, + gate_prob_lm, + gate_prob_mm, + ) = build_weights_and_expert_id(input) + + combine_weights_unnorm, expert_id = combine_weights_and_expert_id.chunk( + 2, dim=-1 + ) + expert_id = expert_id.to(torch.int32) + num_experts = ( + sum(self.config.moe_num_experts) + if isinstance(self.config.moe_num_experts, (tuple, list)) + else self.config.moe_num_experts + ) + if global_dense_expert_mask is not None: + combine_weights_unnorm[global_dense_expert_mask] = 0.0 + expert_id[global_dense_expert_mask] = num_experts + num_experts += 1 + + ( + dispatched_input, + combine_weights_unnorm, + scatter_index, # input -> dispatched_input + scatter_index_rev, # dispatch-input -> input + expert_num_global, + expert_num_local, + ) = self.moe_gate_dispatch_partial_nosoftmaxtopk( + global_hidden_states, + combine_weights_unnorm, + expert_id, + top_k, + num_experts, + ) + + if self.use_correction_bias: + if self.gate.config.multimodel_experts: + # MLLM + for i in range(len(self.moe_statics.expert_usage)): + self.moe_statics.expert_usage[i] += ( + expert_num_local[self.gate.experts_type_mask[i]] + .detach() + .to(self.moe_statics.expert_usage.device) + ) + else: + # LLM + self.moe_statics.expert_usage[0] += expert_num_local.detach().to( + self.moe_statics.expert_usage.device + ) + + # When use unpad , `moe_ops_partial` output likes `scatter_index_rev==[]`. + if scatter_index_rev.ndim == 0: + assert not self.use_padding + scatter_index_rev = torch.empty([0], dtype=scatter_index_rev.dtype) + + expert_num_global_notrunc = expert_num_global + self.capacity_tensor = torch.tensor(capacity).to(dtype=expert_num_global.dtype) + expert_num_global = torch.minimum(expert_num_global, self.capacity_tensor) + + if global_dense_expert_mask is not None: + expert_num_global = expert_num_global[:-1] + expert_num_local = expert_num_local[:-1] + expert_num_global_notrunc = expert_num_global_notrunc[:-1] + + scatter_index = scatter_index.transpose(1, 0) # [k,s] ->[s,k] + scatter_index = scatter_index.to(combine_weights_unnorm.device) + + last_local_expert = 0 + expert_offset_global = expert_num_global.cumsum(-1) + + expert_num_global_list = expert_num_global + if self.use_padding: + offset = last_local_expert * capacity + else: + offset = 0 + local_combine_weights_unnorm = combine_weights_unnorm.contiguous() + local_scatter_index = torch.where( + combine_weights_unnorm > 0.0, + scatter_index + offset, + scatter_index, + ) + if self.gate.norm_gate_logits: + local_combine_weights = local_combine_weights_unnorm / torch.clip( + local_combine_weights_unnorm.sum(-1, keepdim=True), min=1e-12 + ) + else: + local_combine_weights = local_combine_weights_unnorm + local_combine_weights = local_combine_weights.to(dispatched_input.dtype) + if self.use_padding: + dispatched_input = dispatched_input.reshape( + [self.num_local_experts, -1, d_model] + ) + dispatched_input = dispatched_input.unbind(0) + else: + s = 0 + e = self.num_local_experts + expert_num_local = expert_num_local.tolist()[s:e] + expert_num_local_valid = [i for i in expert_num_local if i > 0] + valid_pos = [j for j, i in enumerate(expert_num_local) if i > 0] + if expert_num_local_valid: + dispatched_input_list = dispatched_input.split(expert_num_local_valid) + dispatched_input = [None] * len(expert_num_local) + for p, t in zip(valid_pos, dispatched_input_list): + dispatched_input[p] = t + else: + dispatched_input = [dispatched_input] + ( + [None] * (len(expert_num_local) - 1) + ) + + expert_num_global_list = expert_num_global_list.tolist() + + return ( + dispatched_input, + global_hidden_states, + local_combine_weights, + expert_num_global_notrunc, # for auxloss calculation. + expert_num_global, + expert_num_global_list, + local_scatter_index, + scatter_index_rev, + router_loss, + (gate_logits_lm, gate_prob_lm), + (gate_logits_mm, gate_prob_mm), + expert_num_local, + ) + + def forward_experts(self, *dispatched_input): + """Execute expert model computations in sequence for Mixture-of-Experts (MoE) layer. + + Core Functionality: + - Distributes dispatched tokens to local expert models + - Handles empty expert inputs with zero-initialized fallback + - Maintains gradient flow for expert outputs + - Aggregates outputs from all active experts + + Args: + *dispatched_input: Variable-length expert-specific input tensors + + Returns: + list: Expert output tensors (None for inactive experts) + + Implementation Details: + 1. Processes valid expert inputs through corresponding expert models + 2. Generates dummy inputs for inactive experts to preserve model structure + 3. Aggregates dummy outputs to first active expert to maintain gradient flow + """ + expert_outputs = [] + assert isinstance(self.experts, nn.ModuleList), type(self.experts) + + no_tokens_expert_outputs = [] + true_experts = self.experts[ + self.rank + * self.num_local_experts : (self.rank + 1) + * self.num_local_experts + ] + for iexpert, chunk in enumerate(dispatched_input): + if chunk is None: + expert_outputs.append(None) + continue + + expert_out = true_experts[iexpert](chunk.contiguous()) + expert_outputs.append(expert_out) + + if len(no_tokens_expert_outputs) > 0: + first_has_tokens_idx = 0 + for idx, expert_out in enumerate(expert_outputs): + if expert_out is not None: + first_has_tokens_idx = idx + break + for idx, expert_out in enumerate(no_tokens_expert_outputs): + expert_outputs[first_has_tokens_idx] += expert_out + + return expert_outputs + + +class Ernie4_5_DecoderLayer(nn.Module): + """A single transformer decoder layer in ERNIE-MoE model. + + Contains self-attention and feed-forward components with optional MoE (Mixture of Experts) + support, residual connections, and layer normalization. + """ + + _keep_in_fp32_modules = ["mlp.gate", "e_score_correction_bias"] + + def __init__(self, config, layer_idx): + """Initialize the decoder layer. + + Args: + config (Ernie4_5_MoEConfig): Model configuration. + layer_idx (int): Index of this layer in the transformer stack + """ + super().__init__() + self.hidden_size = config.hidden_size + self.layer_idx = layer_idx + self.config = config + self.use_moe = config.use_moe + self.self_attn = Ernie4_5_Attention(config, layer_idx) + + moe_layer_start_index = ( + min(config.moe_layer_start_index) + if isinstance(config.moe_layer_start_index, (tuple, list)) + else config.moe_layer_start_index + ) + moe_layer_end_index = ( + max(config.moe_layer_end_index) + if isinstance(config.moe_layer_end_index, (tuple, list)) + else config.moe_layer_end_index + ) + + if ( + self.use_moe + and ((layer_idx + 1) % config.moe_layer_interval == 0) + and layer_idx >= moe_layer_start_index # 3 + and layer_idx <= moe_layer_end_index # 53 + ): + gate, experts, lm_gate, lm_experts, moe_statics = ( + self._init_gate_and_experts(layer_idx) + ) + shared_experts = ( + self._init_shared_experts() + if hasattr(config, "moe_num_shared_experts") + else None + ) + + dense_experts = None + moe_cls = MOELayer + if config.moe_multimodal_dispatch_use_allgather: # v2 + logger.info("Enable MOEAllGatherLayerV2!") + moe_cls = partial( + MOEAllGatherLayerV2, + use_expert_out_alltoall="alltoall" + in config.moe_multimodal_dispatch_use_allgather, # false + use_padding=False, + enable_reverse_token_drop=config.moe_reverse_token_drop, # false + dense_token_type=config.moe_dense_experts_token_type_id, # 3 + ) + else: + assert ( + dense_experts is None + ), "only `MOEAllGatherLayerV2` can process dense experts" + + self.mlp = moe_cls( + gate=gate, + experts=experts, + layer_idx=layer_idx, + shared_experts=shared_experts, + group=config.moe_group, + recompute=False, + k=config.moe_k, + all_to_all_dropout=config.moe_all_to_all_dropout, + group_experts=False, + moe_statics=moe_statics, + moe_num_experts=config.moe_num_experts, + ) + + _mlp_text = MOELayer( + gate=lm_gate, + experts=lm_experts, + layer_idx=layer_idx, + shared_experts=shared_experts, + group=config.moe_group, + recompute=False, + k=config.moe_k, + all_to_all_dropout=config.moe_all_to_all_dropout, + group_experts=False, + moe_statics=moe_statics, + moe_num_experts=config.moe_num_experts, + ) + self.mlp_text = ( + lambda: _mlp_text + ) # This lambda prevents the text parameter from being scanned into the state-dict + else: + self.mlp = Ernie4_5_MLP(config) + + Norm = RMSNorm + + self.input_layernorm = Norm(config) + self.post_attention_layernorm = Norm(config) + + self.residual_add1 = FusedDropoutImpl( + config.hidden_dropout_prob, mode="upscale_in_train" + ) + self.residual_add2 = FusedDropoutImpl( + config.hidden_dropout_prob, mode="upscale_in_train" + ) + + def _init_shared_experts(self): + """init shared experts + + Returns: + _type_: _description_ + """ + cfg = deepcopy(self.config) + if cfg.moe_num_shared_experts > 0: + if cfg.moe_intermediate_size: + inter_size = ( + next(iter(cfg.moe_intermediate_size)) + if isinstance(cfg.moe_intermediate_size, (tuple, list)) + else cfg.moe_intermediate_size + ) + cfg.intermediate_size = inter_size * cfg.moe_num_shared_experts + else: + cfg.intermediate_size = ( + cfg.intermediate_size * cfg.moe_num_shared_experts + ) + cfg.disable_ffn_model_parallel = False # split shared epxert + shared_experts = Ernie4_5_MoeMLP(cfg, True) + else: + shared_experts = None + return shared_experts + + def _init_gate_and_experts(self, layer_idx): + """Initialize MoE gate and expert networks. + + Args: + layer_idx (int): Current layer index + + Returns: + Tuple: Contains: + - gate: MoE routing gate + - experts: List of expert networks + - moe_statics: Optional statistics tracker + """ + cfg = deepcopy(self.config) + fc_cls = Ernie4_5_MoeMLP + if cfg.moe_intermediate_size: + if isinstance(cfg.moe_intermediate_size, (tuple, list)): + assert isinstance(cfg.moe_num_experts, (tuple, list)) and len( + cfg.moe_num_experts + ) == len(cfg.moe_intermediate_size) + fc = [] + for _i, (num_experts, intermediate_size) in enumerate( + zip(cfg.moe_num_experts, cfg.moe_intermediate_size) + ): + ex_cfg = deepcopy(cfg) + ex_cfg.intermediate_size = intermediate_size + cur_modality_start_layer_idx = ( + cfg.moe_layer_start_index[_i] + if isinstance(cfg.moe_layer_start_index, (tuple, list)) + else cfg.moe_layer_start_index + ) + cur_modality_end_layer_idx = ( + cfg.moe_layer_end_index[_i] + if isinstance(cfg.moe_layer_end_index, (tuple, list)) + else cfg.moe_layer_end_index + ) + if ( + layer_idx >= cur_modality_start_layer_idx + and layer_idx <= cur_modality_end_layer_idx + ): + if _i == 1: + with UniqueNameGuard(f"mm_expert_{layer_idx}_") as guard: + fc.append((num_experts, fc_cls(ex_cfg))) + else: + fc.append((num_experts, fc_cls(ex_cfg))) + else: + logger.info( + f"moe multimodal experts use Identity layer_idx: {layer_idx}" + ) + fc.append((num_experts, nn.Identity())) + else: + cfg.intermediate_size = cfg.moe_intermediate_size + fc = [(cfg.moe_num_experts, fc_cls(cfg, layer_idx))] + else: + fc = [(cfg.moe_num_experts, fc_cls(cfg, layer_idx))] + if cfg.multimodel_experts: + gate, experts, lm_gate, lm_experts = get_gate(self.config, fc, layer_idx) + else: + gate, experts = get_gate(self.config, fc, layer_idx) + lm_gate, lm_experts = None, None + + # for AuxLoss Free Router: + if cfg.moe_use_aux_free: + moe_statics = MoEStatics(cfg, layer_idx) + else: + moe_statics = None + return gate, experts, lm_gate, lm_experts, moe_statics + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + attn_mask_start_row_indices: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + token_type_ids: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = False, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + use_cache: Optional[bool] = False, + output_gate_logits=True, # PP model should not output gate logits, + ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]: + """Forward pass through the decoder layer. + + Args: + hidden_states (torch.Tensor): Input tensor [batch_size, seq_len, hidden_size] + attention_mask (Optional[torch.Tensor]): Attention mask tensor + attn_mask_start_row_indices (Optional[torch.Tensor]): Indices for variable length attention + position_ids (Optional[torch.Tensor]): Position indices for rotary embeddings + output_attentions (Optional[bool]): Whether to return attention weights + past_key_value (Optional[Tuple[torch.Tensor]]): Cached key/value states + use_cache (Optional[bool]): Whether to cache key/value states + output_gate_logits (bool): Whether to return MoE gate logits + + Returns: + Union: Various output combinations depending on arguments: + - Base case: Hidden states tensor + - With attention: Tuple of (hidden_states, attention_weights) + - With cache: Tuple of (hidden_states, cached_key_value) + - With MoE: May include gate logits in output tuple + """ + residual = hidden_states + + if token_type_ids is not None: + is_multimodel_token = token_type_ids.any() + has_dense_experts_token = ( + token_type_ids == self.config.moe_dense_experts_token_type_id + ).any() + is_multimodel_token_cpu = is_multimodel_token.cpu() + has_dense_experts_token_cpu = has_dense_experts_token.cpu() + else: + is_multimodel_token_cpu = None + has_dense_experts_token_cpu = None + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + (hidden_states, self_attn_weights, present_key_value, *router_loss_attn) = ( + self.self_attn( + hidden_states=hidden_states, + past_key_value=past_key_value, + attention_mask=attention_mask, + attn_mask_start_row_indices=attn_mask_start_row_indices, + position_ids=position_ids, + output_attentions=output_attentions, + use_cache=use_cache, + token_type_ids=token_type_ids, + ) + ) + hidden_states = self.residual_add1(hidden_states, residual) + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + + if isinstance(self.mlp, MOELayer): + if is_multimodel_token_cpu: + hidden_states, _, router_loss, gate_logits = self.mlp( + hidden_states, token_type_ids + ) + else: + hidden_states, _, router_loss, gate_logits = self.mlp_text()( + hidden_states, None, is_multimodel=False + ) + else: + hidden_states = self.mlp(hidden_states) + gate_logits, router_loss = None, None + + hidden_states = self.residual_add2(hidden_states, residual) + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights,) + + if use_cache: + outputs += (present_key_value,) + + if self.use_moe: + # Non-empty only if `use_moe` + if router_loss_attn: + router_loss_attn = router_loss_attn[0] + router_loss = router_loss + router_loss_attn + + if output_gate_logits: + outputs += (gate_logits,) + + # remove empty tuple for pipeline parallel + if type(outputs) is tuple and len(outputs) == 1: + outputs = outputs[0] + + return outputs + + +class Ernie4_5_PretrainedModel(PreTrainedModel): + """Base class for ERNIE pretrained models.""" + + config_class = Ernie4_5_MoEConfig + base_model_prefix = "ernie" + _no_split_modules = ["Ernie4_5_DecoderLayer"] + # _keep_in_fp32_modules = ["mlp.gate", "e_score_correction_bias"] + + +class Ernie4_5_Model(Ernie4_5_PretrainedModel): + """The core ERNIE transformer model with MoE (Mixture of Experts) support.""" + + def __init__(self, config: Ernie4_5_MoEConfig): + """Initialize the ERNIE model architecture. + + Args: + config (Ernie4_5_MoEConfig): Model configuration. + """ + super().__init__(config) + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + self.hidden_size = config.hidden_size + self.config = config + + self.embed_tokens = nn.Embedding( + self.vocab_size, + self.hidden_size, + ) + + self.layers = nn.ModuleList( + [Ernie4_5_DecoderLayer(config, i) for i in range(config.num_hidden_layers)] + ) + Norm = RMSNorm + self.norm = Norm(config) + + self.gradient_checkpointing = False + + def get_input_embeddings(self): + """Get the input embedding layer. + + Returns: + nn.Embedding: The embedding layer for input tokens + """ + return self.embed_tokens + + def set_input_embeddings(self, value): + """Set new input embeddings. + + Args: + value (nn.Embedding): New embedding layer to use + """ + self.embed_tokens = value + + def forward( + self, + input_ids=None, + position_ids=None, + token_type_ids=None, + attention_mask=None, + attn_mask_start_row_indices=None, + inputs_embeds=None, + use_cache=None, + past_key_values=None, + output_attentions=False, + output_hidden_states=None, + return_dict=False, + ): + """Forward pass through the ERNIE model. + + Args: + input_ids (Optional[torch.Tensor]): Input token IDs + position_ids (Optional[torch.Tensor]): Position indices + attention_mask (Optional[torch.Tensor]): Attention mask + attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length attention indices + inputs_embeds (Optional[torch.Tensor]): Precomputed embeddings + use_cache (Optional[bool]): Whether to cache key/value states + past_key_values (Optional[Tuple[Tuple[torch.Tensor]]]): Cached key/value states + output_attentions (Optional[bool]): Whether to output attention weights + output_hidden_states (Optional[bool]): Whether to output all hidden states + return_dict (Optional[bool]): Whether to return dict or tuple + + Returns: + Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]: + Various outputs depending on configuration, including: + - last_hidden_state: Final layer hidden states + - past_key_values: Cached key/value states if use_cache=True + - hidden_states: All hidden states if output_hidden_states=True + - attentions: Attention weights if output_attentions=True + - router_loss: MoE router loss if use_moe=True + - gate_logits: MoE gate logits if use_moe=True + """ + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time" + ) + elif input_ids is not None: + _, seq_length = input_ids.shape + elif inputs_embeds is not None: + _, seq_length, _ = inputs_embeds.shape + else: + raise ValueError( + "You have to specify either decoder_input_ids or decoder_inputs_embeds" + ) + + if past_key_values is None: + past_key_values = tuple([None] * len(self.layers)) + + seq_length_with_past = seq_length + cache_length = 0 + if past_key_values[0] is not None: + cache_length = past_key_values[0][0].shape[1] + seq_length_with_past += cache_length + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + + inputs_embeds = inputs_embeds.to(self.embed_tokens.weight.dtype) + + hidden_states = inputs_embeds + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = () if use_cache else None + if getattr(self.config, "use_moe", False): + all_router_loss = torch.tensor(0.0).to(device=inputs_embeds.device) + else: + all_router_loss = None + all_gate_logits = () + + for idx, (decoder_layer) in enumerate(self.layers): + if output_hidden_states: + all_hidden_states += (hidden_states,) + + past_key_value = ( + past_key_values[idx] if past_key_values is not None else None + ) + + layer_outputs = decoder_layer( + hidden_states, + attention_mask, + attn_mask_start_row_indices, + position_ids, + token_type_ids, + output_attentions, + past_key_value, + use_cache, + ) + + if isinstance(layer_outputs, (tuple, list)): + hidden_states = layer_outputs[0] + else: + hidden_states = layer_outputs + + if use_cache: + next_decoder_cache += (layer_outputs[2 if output_attentions else 1],) + + if output_attentions: + all_self_attns += (layer_outputs[1],) + if self.config.use_moe: + layer_outputs, gate_logits = layer_outputs[:-1], layer_outputs[-1] + all_gate_logits = all_gate_logits + (gate_logits,) + + if past_key_value is not None: + hidden_states = hidden_states[:, -1:, :] + + hidden_states = self.norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = next_decoder_cache if use_cache else None + + if not return_dict: + return tuple( + v + for v in [ + hidden_states, + next_cache, + all_hidden_states, + all_self_attns, + all_router_loss, + all_gate_logits, + ] + if v is not None + ) + + # assert all_router_loss is None, f'moe not support `return-dict`' + return BaseModelOutputWithPastAndCrossAttentions( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + cross_attentions=None, + router_loss=all_router_loss, + gate_logits=all_gate_logits, + ) + + +def parallel_matmul( + x, + y, + bias=None, + transpose_y=False, +): + """ + Performs parallel matrix multiplication with tensor model parallelism support. + + Args: + x (torch.Tensor): Input tensor with shape [batch_size, seq_len, hidden_size] + y (Union[torch.Tensor, EagerParamBase]): Weight matrix which can be: + - Regular tensor + - Distributed parameter in tensor parallel mode + bias (Optional[torch.Tensor]): Optional bias tensor + transpose_y (bool): Whether to transpose the 'y' matrix before multiplication + # tensor_parallel_degree (int): Degree of tensor model parallelism (default: 1) + # tensor_parallel_output (bool): Whether to keep output in tensor parallel format + or gather across devices (default: True) + fuse_linear (bool): Whether to use fused linear operation for optimization + + Returns: + torch.Tensor + + Raises: + AssertionError: If tensor parallel is enabled but weight is not distributed + AttributeError: If called without distributed.launch context + """ + if transpose_y: + logits = torch.matmul(x, y.T) + else: + logits = torch.matmul(x, y) + if bias is not None: + logits += bias + return logits + + +def calc_lm_head_logits( + config, hidden_states, weight, bias, tensor_parallel_output=None, training=True +): + """ + Calculate language model head logits with support for various parallelization strategies. + + This is the core function that computes the final output logits for a language model, + handling sequence parallelism and tensor parallelism configurations. + + Args: + config (Ernie4_5_Config): Model configuration. + hidden_states (Tensor): Hidden states from the transformer layers + weight (Tensor): Weight matrix for the language model head + bias (Tensor): Bias vector for the language model head + tensor_parallel_output (bool, optional): Override for tensor parallel output behavior. + If None, uses config.tensor_parallel_output. + Defaults to None. + training (bool, optional): Whether in training mode. Defaults to True. + + Returns: + Tensor: The computed logits for language modeling. + """ + if tensor_parallel_output is None: + tensor_parallel_output = config.tensor_parallel_output + logits = parallel_matmul( + hidden_states, + weight, + bias=bias, + transpose_y=config.tie_word_embeddings, + ) + + return logits + + +def calc_multimodal_logits( + last_hidden_state: torch.Tensor, + lm_head_weight: torch.Tensor, + lm_head_bias: torch.Tensor, + mm_head_weight: torch.Tensor, + mm_head_bias: torch.Tensor, + token_type_ids_shifted: torch.Tensor, + config: Ernie4_5_VLMoEConfig, +): + """ + calculate logits for pure text, multimodal text, and image + Args: + last_hidden_state: The hidden of the last layer, in sequence-parallel, is in the split state. + ... + token_type_ids_shifted: # Non-sp split tensor + The token-type-ids at the label position is used to select the lm-head corresponding to each token. + Note: In the id sequence of alternating images and texts, the last text token will predict the image id, + and vice versa, so it is necessary to select the lmhead weight corresponding to the label type. + """ + # Align the type of ids with the type of label. For the last ids, assume that the token type remains unchanged. + # TODO: Pass token-type-ids from reader + assert last_hidden_state.shape[:2] == token_type_ids_shifted.shape, ( + last_hidden_state.shape, + token_type_ids_shifted.shape, + ) + parallel_matmul_tp = partial( + parallel_matmul, + ) + + if mm_head_weight is None: + if config.use_recompute_loss_fn: + return last_hidden_state, None, None + score_text = parallel_matmul_tp(last_hidden_state, lm_head_weight, lm_head_bias) + return score_text, None, None + + image_mask_shifted = token_type_ids_shifted == TokenType.image + text_pos_shifted = token_type_ids_shifted == TokenType.text + + if text_pos_shifted.any().item() > 0: + score_text = parallel_matmul_tp( + last_hidden_state[text_pos_shifted], lm_head_weight, lm_head_bias + ) + else: + score_text = None + + if mm_head_weight is not None and image_mask_shifted.any().item() > 0: + score_image = parallel_matmul_tp( + last_hidden_state[image_mask_shifted], mm_head_weight, mm_head_bias + ) + else: + score_image = None + + return score_text, score_image, None + + +class Ernie4_5_MoeLMHead(nn.Module): + """Language model head for ERNIE with support for tensor parallelism.""" + + def __init__(self, config): + """Initialize the language model head. + + Args: + config (Ernie4_5_Config): Model configuration containing: + - vocab_size: Size of vocabulary + - hidden_size: Dimension of hidden states + # - tensor_parallel_degree: Degree of tensor parallelism + - tie_word_embeddings: Whether to tie input/output embeddings + - weight_share_add_bias: Whether to add bias when weight sharing + - use_bias: Whether to use bias term + - use_recompute_loss_fn: Whether to defer logits computation to loss function + - use_sparse_head_and_loss_fn: Whether to use sparse head computation + """ + + super(Ernie4_5_MoeLMHead, self).__init__() + self.config = config + if config.tensor_parallel_degree > 1: + vocab_size = config.vocab_size // config.tensor_parallel_degree + else: + vocab_size = config.vocab_size + + if config.tie_word_embeddings: + self.weight = nn.Parameter( + torch.empty( + vocab_size, config.hidden_size, dtype=torch.get_default_dtype() + ) + ) + else: + self.weight = nn.Parameter( + torch.empty( + config.hidden_size, vocab_size, dtype=torch.get_default_dtype() + ) + ) + nn.init.xavier_uniform_(self.weight) + + logger.info( + f"output-weight:{self.weight.shape} tie_word_embeddings:{config.tie_word_embeddings}" + ) + + if config.weight_share_add_bias and config.use_bias: + self.bias = nn.Parameter( + torch.zeros(vocab_size, dtype=torch.get_default_dtype()) + ) + else: + self.bias = None + + # Must set distributed attr for Tensor Parallel ! + self.weight.is_distributed = ( + True if (vocab_size != config.vocab_size) else False + ) + if config.weight_share_add_bias and config.use_bias: + self.bias.is_distributed = ( + True if (vocab_size != config.vocab_size) else False + ) + + if self.weight.is_distributed: + self.weight.split_axis = 1 + if ( + config.weight_share_add_bias + and config.use_bias + and self.bias.is_distributed + ): + self.bias.split_axis = 0 + + if self.config.use_recompute_loss_fn: + logger.info( + "Using recompute_loss_fn, the calculation of logits will be moved into " + "loss_fn for memory optimization" + ) + + def forward(self, hidden_states, tensor_parallel_output=None): + """Project hidden states to vocabulary logits. + + Args: + hidden_states (torch.Tensor): Input tensor of shape [batch_size, seq_len, hidden_size] + tensor_parallel_output (Optional[bool]): Whether to output parallel results. Defaults to None. + + Returns: + Union[ + Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + # When use_recompute_loss_fn or use_sparse_head_and_loss_fn + - hidden_states: Original input + - weight: Projection weights + - bias: Optional bias term + Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor], bool]: # With tensor_parallel_output + Same as above plus tensor_parallel_output flag + torch.Tensor: # Normal case + Logits tensor of shape [batch_size, seq_len, vocab_size] + ] + """ + return calc_lm_head_logits( + self.config, + hidden_states, + self.weight, + self.bias, + tensor_parallel_output, + training=self.training, + ) + + +class Ernie4_5_MoeForCausalLM(Ernie4_5_PretrainedModel, GenerationMixin): + """ERNIE Mixture of Experts (MoE) model for causal language modeling.""" + + _keys_to_ignore_on_load_missing = [r"lm_head.weight"] + + def __init__(self, config): + """ + Initializes the ERNIE MoE model for causal language modeling. + + Args: + config (dict): Model configuration. + """ + super().__init__(config) + + # initialize-trick for big model, + # see https://github.com/bigscience-workshop/bigscience/blob/master/train/tr11-176B-ml/README.md#std-init + new_initializer_range = math.sqrt(0.3333 / config.hidden_size) + logger.info( + f"change initializer-range from {config.initializer_range} to {new_initializer_range}" + ) + config.initializer_range = new_initializer_range + self.config = config + self.model = Ernie4_5_Model(config) + self.lm_head = Ernie4_5_MoeLMHead(config) + + self.tie_weights() # maybe weight share + + def get_input_embeddings(self): + """Returns the input embeddings layer.""" + return self.model.embed_tokens + + def set_input_embeddings(self, value): + """Sets the input embeddings layer.""" + self.model.embed_tokens = value + + def get_output_embeddings(self): + """Returns the output embeddings (LM head).""" + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + """Sets the output embeddings layer.""" + self.lm_head = new_embeddings + + def set_decoder(self, decoder): + """Sets the ERNIE decoder model.""" + self.model = decoder + + def get_decoder(self): + """Get the transformer decoder. + + Returns: + nn.Layer: The decoder module + """ + return self.model + + def prepare_attention_mask_for_generation( + self, input_ids, pad_token_id, eos_token_id + ): + """Avoid using attention_mask with flash_attn on generation.""" + if self.config.use_flash_attention: + return None + return super().prepare_attention_mask_for_generation( + input_ids, pad_token_id, eos_token_id + ) + + +class VisionMlp(nn.Module): + """VisionMLP""" + + def __init__(self, dim: int, hidden_dim: int, hidden_act: str) -> None: + super().__init__() + self.fc1 = nn.Linear(dim, hidden_dim) + self.act = ACT2FN[hidden_act] + self.fc2 = nn.Linear(hidden_dim, dim) + + def forward(self, x) -> torch.Tensor: + """ + Args: + x (torch.Tensor): input tensor + + Returns: + torch.Tensor: VisionMLP output tensor + """ + return self.fc2(self.act(self.fc1(x))) + + +class PatchEmbed(nn.Module): + """PatchEmbed""" + + def __init__( + self, + patch_size: int = 14, + in_channels: int = 3, + embed_dim: int = 1152, + ) -> None: + """ + Args: + patch_size (int, optional): patch size. Defaults to 14. + in_channels (int, optional): number of channels. Defaults to 3. + embed_dim (int, optional): embedding dimension. Defaults to 1152. + """ + super().__init__() + self.patch_size = patch_size + self.in_channels = in_channels + self.embed_dim = embed_dim + self.proj = nn.Linear( + in_channels * patch_size * patch_size, embed_dim, bias=False + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + """ + Args: + hidden_states (torch.Tensor): hidden states + + Returns: + torch.Tensor: output tensor + """ + target_dtype = self.proj.weight.dtype + + hidden_states = self.proj(hidden_states.to(target_dtype)) + + return hidden_states + + +class VisionRotaryEmbedding(nn.Module): + """VisionRotaryEmbedding""" + + def __init__(self, dim: int, theta: float = 10000.0) -> None: + """ + Args: + dim (int): the dimension of each token. + theta (float, optional): the frequency factor. Defaults to 10000.0. + """ + super().__init__() + self.inv_freq = 1.0 / theta ** ( + torch.arange(start=0, end=dim, step=2, dtype=torch.float32) / dim + ) + + def forward(self, seqlen: int) -> torch.Tensor: + """ + Args: + seqlen (int): length of sequence. + + Returns: + torch.Tensor: rotary position embedding + """ + seq = torch.arange(seqlen).to(self.inv_freq.dtype) + freqs = torch.outer(input=seq, vec2=self.inv_freq) + return freqs + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) # shape is the same as x + + +def apply_rotary_pos_emb_vision( + tensor: torch.Tensor, freqs: torch.Tensor +) -> torch.Tensor: + """Applies Rotary Position Embedding to the input tensors. + + Args: + tensor (torch.Tensor): The input tensor. + freqs (torch.Tensor): The frequencies used for the rotation. + Returns: + output (torch.Tensor): the tensor rotated using the Rotary Position Embedding. + """ + orig_dtype = tensor.dtype + + tensor = tensor.type(dtype=torch.float32) + cos = freqs.cos() + sin = freqs.sin() + cos = cos.unsqueeze(1).tile(1, 1, 2).unsqueeze(0).type(dtype=torch.float32) + sin = sin.unsqueeze(1).tile(1, 1, 2).unsqueeze(0).type(dtype=torch.float32) + output = tensor * cos + rotate_half(tensor) * sin + output = output.to(orig_dtype) + return output + + +class VisionAttention(nn.Module): + """VisionAttention""" + + def __init__(self, dim: int, num_heads: int = 16) -> None: + super().__init__() + self.num_heads = num_heads + self.qkv = nn.Linear(dim, dim * 3, bias=True) + self.proj = nn.Linear(dim, dim) + self.head_dim = dim // num_heads # must added + + def forward( + self, + hidden_states: torch.Tensor, + cu_seqlens: torch.Tensor, + rotary_pos_emb: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """forward function for vision attention""" + seq_length = hidden_states.shape[0] + qkv = ( + self.qkv(hidden_states) + .reshape([seq_length, 3, self.num_heads, -1]) + .permute(1, 0, 2, 3) + ) + q, k, v = qkv.unbind(axis=0) + + q = apply_rotary_pos_emb_vision(q.unsqueeze(dim=0), rotary_pos_emb).squeeze( + dim=0 + ) + k = apply_rotary_pos_emb_vision(k.unsqueeze(dim=0), rotary_pos_emb).squeeze( + dim=0 + ) + + max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item() + + attention_mask = torch.full( + [1, seq_length, seq_length], + torch.finfo(q.dtype).min, + device=q.device, + dtype=q.dtype, + ) + for i in range(1, len(cu_seqlens)): + attention_mask[ + ..., + cu_seqlens[i - 1] : cu_seqlens[i], + cu_seqlens[i - 1] : cu_seqlens[i], + ] = 0 + + q = q.transpose(0, 1) + k = k.transpose(0, 1) + v = v.transpose(0, 1) + attn_weights = torch.matmul(q, k.transpose(1, 2)) / math.sqrt(self.head_dim) + attn_weights = attn_weights + attention_mask + attn_weights = nn.functional.softmax( + attn_weights, dim=-1, dtype=torch.float32 + ).to(q.dtype) + attn_output = torch.matmul(attn_weights, v) + attn_output = attn_output.transpose(0, 1) + attn_output = attn_output.reshape(seq_length, -1) + attn_output = self.proj(attn_output) + return attn_output + + +class DFNRopeVisionBlock(nn.Module): + """DFNRopeVisionBlock""" + + def __init__(self, config, attn_implementation: str = "sdpa") -> None: + """ + Args: + config (dict): model configuration. + attn_implementation (str, optional): attention implementation. Defaults to "sdpa". + """ + super().__init__() + self.norm1 = nn.LayerNorm(config.embed_dim, eps=1e-6) + self.norm2 = nn.LayerNorm(config.embed_dim, eps=1e-6) + mlp_hidden_dim = int(config.embed_dim * config.mlp_ratio) + + self.attn = VisionAttention(config.embed_dim, num_heads=config.num_heads) + self.mlp = VisionMlp( + dim=config.embed_dim, + hidden_dim=mlp_hidden_dim, + hidden_act=config.hidden_act, + ) + self.config = config + + def forward(self, hidden_states, cu_seqlens, rotary_pos_emb) -> torch.Tensor: + """ + Args: + hidden_states(torch.Tensor): hidden states + cu_seqlens (torch.Tensor): cumulative sequence lengths + rotary_pos_emb: rotary position embedding + + Returns: + torch.Tensor: output tensor + """ + hidden_states = hidden_states + self.attn( + self.norm1(hidden_states), + cu_seqlens=cu_seqlens, + rotary_pos_emb=rotary_pos_emb, + ) + hidden_states = hidden_states + self.mlp(self.norm2(hidden_states)) + return hidden_states + + +class DFNRopeVisionTransformerPreTrainedModel(PreTrainedModel): + """DFNRopeVisionTransformerPreTrainedModel""" + + config_class = DFNRopeVisionTransformerConfig + _tp_plan = {} + + def __init__(self, config) -> None: + """ + Args: + config (dict): model configuration + """ + super().__init__(config) + self.spatial_merge_size = config.spatial_merge_size + + self.patch_embed = PatchEmbed( + patch_size=config.patch_size, + in_channels=config.in_channels, + embed_dim=config.embed_dim, + ) + + head_dim = config.embed_dim // config.num_heads + self.rotary_pos_emb = VisionRotaryEmbedding(head_dim // 2) + + self.blocks = nn.ModuleList( + [DFNRopeVisionBlock(config) for _ in range(config.depth)] + ) + + assert ( + config.hidden_size == config.embed_dim + ), "in DFNRope, vit's config.hidden must be equal to config.embed_dim" + self.ln = nn.LayerNorm(config.hidden_size, eps=1e-6) + + def rot_pos_emb(self, grid_thw, num_pad=0): + """rot_pos_emb + + Args: + grid_thw (torch.Tensor): grid thw of input + + Returns: + torch.Tensor: rotary position embedding + """ + pos_ids = [] + grid_hw_array = np.array(grid_thw.cpu(), dtype=np.int64) + for t, h, w in grid_hw_array: + hpos_ids = np.arange(h).reshape([-1, 1]) + hpos_ids = np.tile(hpos_ids, (1, w)) + hpos_ids = hpos_ids.reshape( + h // self.spatial_merge_size, + self.spatial_merge_size, + w // self.spatial_merge_size, + self.spatial_merge_size, + ) + hpos_ids = np.transpose(hpos_ids, (0, 2, 1, 3)) + hpos_ids = hpos_ids.flatten() + + wpos_ids = np.arange(w).reshape([1, -1]) + wpos_ids = np.tile(wpos_ids, (h, 1)) + wpos_ids = wpos_ids.reshape( + h // self.spatial_merge_size, + self.spatial_merge_size, + w // self.spatial_merge_size, + self.spatial_merge_size, + ) + wpos_ids = np.transpose(wpos_ids, (0, 2, 1, 3)) + wpos_ids = wpos_ids.flatten() + + stacked_ids = np.stack([hpos_ids, wpos_ids], axis=-1) + tiled_ids = np.tile(stacked_ids, (t, 1)) + pos_ids.append(tiled_ids) + + pos_ids = np.concatenate(pos_ids, axis=0) + if num_pad > 0: + pos_ids = np.concatenate( + [pos_ids, np.zeros((num_pad, 2), dtype=pos_ids.dtype)] + ) + max_grid_size = np.amax(grid_hw_array[:, 1:]) + rotary_pos_emb_full = self.rotary_pos_emb(max_grid_size) + rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(start_dim=1) + return rotary_pos_emb + + def forward( + self, hidden_states: torch.Tensor, grid_thw: torch.Tensor, num_pad=0 + ) -> torch.Tensor: + """ + Args: + hidden_states (torch.Tensor): input tensor + grid_thw (torch.Tensor): grid thw of input + num_pad (int): number of padding tokens + + Returns: + torch.Tensor: output tensor + """ + hidden_states = self.patch_embed(hidden_states) + + rotary_pos_emb = self.rot_pos_emb(grid_thw, num_pad=num_pad) + rotary_pos_emb = rotary_pos_emb.to(hidden_states.device) + + cu_seqlens = torch.repeat_interleave( + grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0] + ).cumsum(dim=0, dtype=torch.int32) + + if num_pad > 0: + cu_seqlens = F.pad(cu_seqlens, (1, 1), value=0) + cu_seqlens[-1] = cu_seqlens[-2] + num_pad + else: + cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + + for idx, blk in enumerate(self.blocks): + hidden_states = blk( + hidden_states, + cu_seqlens=cu_seqlens, + rotary_pos_emb=rotary_pos_emb, + ) + + ret = self.ln(hidden_states) # add norm + return ret + + +class VariableResolutionResamplerModel(nn.Module): + """ + VariableResolutionResamplerModel, support variable resolution + """ + + def __init__(self, in_dim, out_dim, spatial_conv_size, temporal_conv_size, config): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + self.config = config + self.spatial_conv_size = spatial_conv_size + self.temporal_conv_size = temporal_conv_size + self.use_temporal_conv = config.use_temporal_conv + + # compress 2d conv(picture) to 1d + self.spatial_dim = self.in_dim * self.spatial_conv_size * self.spatial_conv_size + # compress 3d conv(video) to 1d + self.temporal_dim = ( + self.in_dim + * self.spatial_conv_size + * self.spatial_conv_size + * self.temporal_conv_size + ) + + # using unique name space start with "mm_resampler_" + with UniqueNameGuard("mm_resampler_") as guard: + + self.spatial_linear = nn.Sequential( + nn.Linear(self.spatial_dim, self.spatial_dim), + nn.GELU(), + nn.Linear(self.spatial_dim, self.spatial_dim), + nn.LayerNorm(self.spatial_dim, eps=1e-6), + ) + + if self.use_temporal_conv: + self.temporal_linear = nn.Sequential( + nn.Linear(self.temporal_dim, self.spatial_dim), + nn.GELU(), + nn.Linear(self.spatial_dim, self.spatial_dim), + nn.LayerNorm(self.spatial_dim, eps=1e-6), + ) + + self.mlp = nn.Linear(self.spatial_dim, self.out_dim) + + out_config = deepcopy(config) + out_config.hidden_size = out_dim + self.after_norm = RMSNorm(out_config) + + def spatial_conv_reshape(self, x, spatial_conv_size): + """ + reshape before linear to imitation conv + """ + S, C = x.shape + x = x.reshape([-1, C * (spatial_conv_size**2)]) + return x + + def forward(self, x, image_mask, token_type_ids, image_type_ids, grid_thw): + """ + x: image_features + image_mask: [B] + token_types_ids: [B] + image_type_ids: [B_image] + grid_thw: [B_image, 3] + """ + assert image_type_ids is not None + + def fwd_spatial(x): + """ + x in the shape of [S, H] + S is ordered in the following way: [ [patch_h*patch_w (row-major traversal)] * patch_time] + H is simply hidden + """ + x = self.spatial_conv_reshape(x, self.spatial_conv_size) + + x = self.spatial_linear(x) + + return x + + def fwd_placeholder(x, grid_thw, to_tensor=False): + """ + x: [S, H] + grid_thw: [S, 3] + the second dimension: [t, h, w] + """ + + grid_thw_cpu = grid_thw.cpu().numpy() + grid_t, grid_hw = grid_thw_cpu[:, 0], grid_thw_cpu[:, 1:] + grid_hw_after_conv = grid_hw.prod(-1) // (self.spatial_conv_size**2) + + tokens_per_img_or_vid = grid_thw_cpu.prod(-1) // (self.spatial_conv_size**2) + batch_offset = np.empty( + tokens_per_img_or_vid.size, dtype=tokens_per_img_or_vid.dtype + ) + batch_offset[0] = 0 + batch_offset[1:] = tokens_per_img_or_vid.cumsum()[:-1] + + assert ( + self.temporal_conv_size == 2 + ), f"Hard Code: temporal_conv_size==2, got:{self.temporal_conv_size}" + + # TODO: support any temporal conv size + slice_offsets = [] + for temporoal_size, spatial_size, b_offset in zip( + grid_t, grid_hw_after_conv, batch_offset + ): + for temp_offset in range(0, temporoal_size, 2): + slice_offsets.append( + np.arange( + b_offset + (temp_offset) * spatial_size, + b_offset + (temp_offset + 1) * spatial_size, + ) + ) + slice_offsets = torch.tensor(np.concatenate(slice_offsets, axis=-1)).to( + x.device + ) + + slice_offsets2 = [] + for temporoal_size, spatial_size, b_offset in zip( + grid_t, grid_hw_after_conv, batch_offset + ): + for temp_offset in range( + 1 if temporoal_size > 1 else 0, temporoal_size, 2 + ): + slice_offsets2.append( + np.arange( + b_offset + (temp_offset) * spatial_size, + b_offset + (temp_offset + 1) * spatial_size, + ) + ) + slice_offsets2 = torch.tensor(np.concatenate(slice_offsets2, axis=-1)).to( + x.device + ) + + x_timestep_1 = torch.index_select(x, dim=0, index=slice_offsets) + x_timestep_2 = torch.index_select(x, dim=0, index=slice_offsets2) + x = torch.concat([x_timestep_1, x_timestep_2], dim=-1) + return x + + def fwd_temporal(x): + x = self.temporal_linear(x) + return x + + def fwd_mlp(x): + x = self.mlp(x) + x = self.after_norm(x) + return x + + x = fwd_spatial(x) + if self.use_temporal_conv: + x = fwd_placeholder(x, grid_thw) + x = fwd_temporal(x) + x = fwd_mlp(x) + return x + + +class Ernie4_5_MoeVLHead(Ernie4_5_MoeLMHead): + """Ernie4_5_MoeVLHead""" + + def __init__(self, config): + super().__init__(config) + self.config = config + if config.mm_vocab_size > 0: + mm_vocab_config = deepcopy(config) + mm_vocab_config.vocab_size = config.mm_vocab_size + assert mm_vocab_config.vocab_size > 0, mm_vocab_config + assert ( + mm_vocab_config.im_patch_id >= mm_vocab_config.max_text_id + ), mm_vocab_config + self.mm_head = Ernie4_5_MoeLMHead(mm_vocab_config) + else: + self.mm_head = None + + def forward(self, hidden_state, token_type_ids_labels, use_cache=False): + """ + Args: + hidden_state(torch.Tensor): hidden state + token_type_ids_labels(torch.Tensor): token ids + use_cache(bool): whether to use cache, default is False + + Returns: + logits_text(torch.Tensor): text logits + logits_image(torch.Tensor): image logits + """ + if not use_cache: + mm_head_weight = self.mm_head.weight if self.mm_head is not None else None + mm_head_bias = self.mm_head.bias if self.mm_head is not None else None + logits_text, logits_image, _ = calc_multimodal_logits( + hidden_state, + self.weight, + self.bias, + mm_head_weight, + mm_head_bias, + token_type_ids_labels, + self.config, + ) + return logits_text, logits_image, None + else: + # TODO,support lm_head decode only + return ( + parallel_matmul( + hidden_state[:, -1:, :], + self.weight, + self.bias, + transpose_y=self.config.tie_word_embeddings, + ), + None, + None, + ) + + +class Ernie4_5_VLMoeForConditionalGeneration(Ernie4_5_MoeForCausalLM): + """Ernie4_5_VLMoeForConditionalGeneration""" + + config_class = Ernie4_5_VLMoEConfig + main_input_name = "pixel_values" + _keep_in_fp16_modules = ["vision_model"] + _tp_plan = {} + + def __init__( + self, config: Ernie4_5_VLMoEConfig, vision_model=None, resampler_model=None + ): + """ + initialize Ernie4_5_VLMoeForConditionalGeneration + + Args: + config(Ernie4_5_VLMoEConfig): Model configuration. + vision_model(nn.Module): vision model + resampler_model(nn.Module): resampler model + """ + super().__init__(config) + + self.vision_model = DFNRopeVisionTransformerPreTrainedModel( + config.vision_config + ) + + self.model.resampler_model = VariableResolutionResamplerModel( + config.pixel_hidden_size, + config.hidden_size, + config.spatial_conv_size, + config.temporal_conv_size, + config=config, + ) + + self.image_preprocess = None + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + self.post_init() + + def add_image_preprocess(self, processor): + """add image preprocess""" + logger.info("image preprocess is set") + + image_preprocess = processor.image_processor + image_preprocess.image_mean_tensor = torch.tensor( + image_preprocess.image_mean, dtype=torch.float32 + ).reshape([1, 3, 1, 1]) + image_preprocess.image_std_tensor = torch.tensor( + image_preprocess.image_std, dtype=torch.float32 + ).reshape([1, 3, 1, 1]) + image_preprocess.rescale_factor = torch.tensor( + image_preprocess.rescale_factor, dtype=torch.float32 + ) + image_preprocess.image_mean_tensor = image_preprocess.image_mean_tensor.squeeze( + [-2, -1] + ).repeat_interleave(self.config.vision_config.patch_size**2 * 1, -1) + image_preprocess.image_std_tensor = image_preprocess.image_std_tensor.squeeze( + [-2, -1] + ).repeat_interleave(self.config.vision_config.patch_size**2 * 1, -1) + + self.image_preprocess = image_preprocess + + def vision_forward( + self, + images, + image_position_ids, + image_attention_mask, + grid_thw, + ): + """vision_forward""" + if self.image_preprocess is not None: + assert images.dtype == torch.uint8, images.dtype + current_device = images.device + self.image_preprocess.image_mean_tensor = ( + self.image_preprocess.image_mean_tensor.to(current_device) + ) + self.image_preprocess.image_std_tensor = ( + self.image_preprocess.image_std_tensor.to(current_device) + ) + images = self.image_preprocess.rescale_factor * images.to(torch.float32) + images = ( + images - self.image_preprocess.image_mean_tensor + ) / self.image_preprocess.image_std_tensor + images = images.to(torch.bfloat16) + else: + assert images.dtype == torch.bfloat16, images.dtype + # logger.info(f"extract feature input - {images}--{grid_thw}") + if grid_thw is not None: + grid_thw = grid_thw[grid_thw > 0].reshape([-1, 3]) + grid_thw = F.pad( + torch.repeat_interleave(grid_thw[:, 1:], grid_thw[:, 0], 0), + [1, 0, 0, 0], + value=1, + ) + image_features = self.vision_model(images, grid_thw) + return image_features + + def vision_mapping_forward( + self, + token_type_ids, + token_type_ids_w_video, + input_ids, + mm_input_ids, + image_features, + inputs_embeds, + image_type_ids, + grid_thw, + ): + """vision_mapping_forward""" + image_mask = input_ids == self.config.im_patch_id + image_features = self.model.resampler_model( + image_features, + image_mask, + token_type_ids_w_video, + image_type_ids, + grid_thw, + ) + + if image_features.dim == 2: + B, N, C = image_features.shape + image_features = image_features.reshape([B * N, C]).to(inputs_embeds.dtype) + # Will overwrite the part of `ids==im_patch_id` in `mm_ids_features` + inputs_embeds[image_mask.to(inputs_embeds.device)] = image_features.to( + inputs_embeds.device + ) + return inputs_embeds + + def prepare_inputs_for_generation( + self, + input_ids, + images=None, + use_cache=False, + past_key_values=None, + inputs_embeds=None, + image_position_ids=None, + image_attention_mask=None, + token_type_ids=None, + image_type_ids=None, + grid_thw=None, + **kwargs, + ): + """ + Prepare inputs for the decoder that can be used for generation. + + Args: + input_ids (torch.Tensor): Input ids. + images (torch.Tensor): Images. Default to None. + use_cache (bool): Whether to use cache. Default to False. + past_key_values (list): Past key values. Default to None. + inputs_embeds (torch.Tensor): Input embeddings. Default to None. + image_position_ids (torch.Tensor): Image position ids. Default to None. + image_attention_mask (torch.Tensor): Image attention mask. Default to None. + token_type_ids (torch.Tensor): Token type ids. Default to None. + image_type_ids (torch.Tensor): Image type ids. Default to None. + grid_thw (torch.Tensor): Grid thw. Default to None. + """ + if past_key_values: + input_ids = input_ids[:, -1:] + token_type_ids = token_type_ids[:, -1:] + image_type_ids = ( + image_type_ids[:, -1:] if image_type_ids is not None else None + ) + + attention_mask = kwargs.get("attention_mask", None) + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} + else: + model_inputs = {"input_ids": input_ids} + + model_inputs.update( + { + "past_key_values": past_key_values, + "use_cache": True, + "attention_mask": attention_mask, + "images": images, + "image_position_ids": image_position_ids, + "image_attention_mask": image_attention_mask, + "image_type_ids": image_type_ids, + "token_type_ids": torch.cat( + [ + token_type_ids, + torch.zeros( + [len(token_type_ids), 1], dtype=token_type_ids.dtype + ).to(token_type_ids.device), + ], + dim=-1, + ), + "grid_thw": grid_thw, + } + ) + if self.config.rope_3d: + model_inputs.update({"position_ids": kwargs["position_ids"]}) + + return model_inputs + + def _post_init(self, original_init, *args, **kwargs): + """ + Label all multimodal parameters in the model, only head and Embedding + Experts parameters are already labeled + """ + super()._post_init(self, original_init, *args, **kwargs) + if self.lm_head.mm_head is not None: + self.lm_head.mm_head.weight.expert_type = "expert_type_1" + if getattr(self.lm_head.mm_head, "bias", None) is not None: + self.lm_head.mm_head.bias.expert_type = "expert_type_1" + + def forward( + self, + input_ids: torch.Tensor, + position_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + past_key_values: Optional[List[torch.Tensor]] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + labels: Optional[torch.Tensor] = None, + images: Optional[torch.Tensor] = None, + ignored_index: Optional[int] = 0, + return_dict: Optional[bool] = None, + image_position_ids: Optional[torch.Tensor] = None, + image_attention_mask: Optional[torch.Tensor] = None, + token_type_ids: Optional[torch.Tensor] = None, + image_type_ids: Optional[torch.Tensor] = None, + grid_thw: Optional[torch.Tensor] = None, + **kwargs, + ): + """ + Forward for Ernie4_5_VLMoeForConditionalGeneration + + Args: + input_ids (torch.Tensor): Input ids. + position_ids (Optional[torch.Tensor], optional): Position ids. Defaults to None. + attention_mask (Optional[torch.Tensor], optional): Attention mask. Defaults to None. + past_key_values (Optional[List[torch.Tensor]], optional): Past key values. Defaults to None. + use_cache (Optional[bool], optional): Use cache. Defaults to None. + output_attentions (Optional[bool], optional): Output attentions. Defaults to None. + output_hidden_states (Optional[bool], optional): Output hidden states. Defaults to None. + labels (Optional[torch.Tensor], optional): Labels. Defaults to None. + images (Optional[torch.Tensor]): Images. Defaults to None. + ignored_index (Optional[int], optional): Ignored index. Defaults to 0. + return_dict (Optional[bool], optional): Return dict. Defaults to None. + image_position_ids (Optional[torch.Tensor], optional): Image position ids. Defaults to None. + image_attention_mask (Optional[torch.Tensor], optional): Image attention mask. Defaults to None. + token_type_ids (Optional[torch.Tensor], optional): Token type ids. Defaults to None. + image_type_ids (Optional[torch.Tensor], optional): Image type ids. Defaults to None. + grid_thw (Optional[torch.Tensor], optional): Grid thw. Defaults to None. + """ + if grid_thw is not None: + grid_thw = grid_thw[grid_thw > 0].reshape([-1, 3]) + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + image_mask = input_ids == self.config.im_patch_id + + image_rate = image_mask.to(torch.float32).mean() + + if past_key_values is None: + if images is not None: + assert (image_mask).any().item(), ( + image_mask.detach().cpu().numpy().tolist(), + input_ids.detach().cpu().numpy().tolist(), + self.config.im_patch_id, + images.shape, + ) + image_features = self.vision_forward( + images, + image_position_ids, + image_attention_mask, + grid_thw, + ) + else: + image_features = None # no more faking + else: + image_features = None + if token_type_ids is None: + token_type_ids = image_mask.to(torch.int64) + token_type_ids_labels = torch.cat( + [token_type_ids[:, 1:], token_type_ids[:, -1:]], 1 + ) + else: + assert ( + token_type_ids.shape[1] == input_ids.shape[1] + 1 + ), f"token_type:{token_type_ids.shape}, ids:{input_ids.shape}" + token_type_ids_labels = token_type_ids[..., 1:] + + lm_input_ids = input_ids.clone() + mm_input_ids = input_ids.clone() + + inputs_embeds = self.model.embed_tokens(lm_input_ids) + token_type_ids_w_video = token_type_ids[..., :-1].clone() + token_type_ids[token_type_ids == TokenType.video] = TokenType.image + + if images is not None and image_features is not None: + inputs_embeds = self.vision_mapping_forward( + token_type_ids, + token_type_ids_w_video, + input_ids, + mm_input_ids, + image_features, + inputs_embeds, + image_type_ids, + grid_thw, + ) + else: + pass # do nothing, should not hang under DygraphShardingOptimizerV2 + + outputs = self.model( + position_ids=position_ids, + attention_mask=None, + token_type_ids=token_type_ids, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + past_key_values=past_key_values, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + ) + + if not use_cache: + assert outputs.last_hidden_state.shape[:2] == token_type_ids_labels.shape, ( + outputs.last_hidden_state.shape, + token_type_ids_labels.shape, + ) + if self.config.use_recompute_loss_fn: + logits = outputs.last_hidden_state + else: + logits = self.lm_head(outputs.last_hidden_state) + else: + logits = self.lm_head(outputs.last_hidden_state[:, -1:, :]) + + router_loss = outputs.router_loss + + # aka Generate Decoding + loss = None + return CausalLMOutputWithCrossAttentions( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + router_loss=outputs.router_loss, + ) + + @staticmethod + def _resolve_prefix_keys(state_keys_base, state_keys_real, ignore_error=False): + """_resolve_prefix_keys""" + # state_keys_map base to real + state_keys_map = {} + + state_keys_base = set(state_keys_base) + state_keys_real = set(state_keys_real) + + for key in state_keys_base: + for x in state_keys_real: + if "mm_embed_tokens" in x: + if "mm_embed_tokens" in key: + state_keys_map[key] = x + break + elif x.endswith(key): + state_keys_map[key] = x + break + if key not in state_keys_map: + if not ignore_error: + logger.error(f"could not find name {key} in loaded state dict!") + else: + state_keys_real.remove(state_keys_map[key]) + + return state_keys_map + + +@dataclass +class BaseModelOutputWithPastAndCrossAttentions(ModelOutput): + """ + Base class for model outputs with past key values and cross attention layers, + with additional support for router components in mixture-of-experts models. + + This extends the base model output to include: + 1. Router-related outputs for expert selection + 2. Maintains all existing functionality from the parent class + """ + + last_hidden_state: Optional[Tuple[torch.Tensor]] = None + past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None + hidden_states: Optional[Tuple[torch.Tensor]] = None + attentions: Optional[Tuple[torch.Tensor]] = None + cross_attentions: Optional[Tuple[torch.Tensor]] = None + router_loss: Optional[torch.Tensor] = None + gate_logits: Optional[Tuple[torch.Tensor]] = None + + +@dataclass +class CausalLMOutputWithCrossAttentions(ModelOutput): + """ + Base class for causal language model (or autoregressive) outputs. + + Args: + loss (`torch.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + Language modeling loss (for next-token prediction). + logits (`torch.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`): + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). + hidden_states (`tuple(torch.Tensor)`, *optional*, returned when `output_hidden_states=True` + is passed or when `config.output_hidden_states=True`): + Tuple of `torch.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. + attentions (`tuple(torch.Tensor)`, *optional*, returned when `output_attentions=True` is passed or + when `config.output_attentions=True`): + Tuple of `torch.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + router_loss (Optional[torch.Tensor]): + The routing loss computed by the gating network in mixture-of-experts models. + This is typically the load balancing loss that encourages equal expert utilization. + None when not using mixture-of-experts routing. + """ + + loss: Optional[torch.Tensor] = None + logits: torch.Tensor = None + past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None + hidden_states: Optional[Tuple[torch.Tensor]] = None + attentions: Optional[Tuple[torch.Tensor]] = None + router_loss: Optional[Tuple[torch.Tensor]] = None diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e41e01ce597c30c1dcc567421ea973bbfe46c3a --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,29 @@ +{ + "crop_size": { + "height": 224, + "width": 224 + }, + "do_center_crop": false, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 224, + "width": 224 + }, + "min_pixels": 3136, + "max_pixels": 4816896 +} diff --git a/processing_ernie_45t_vl.py b/processing_ernie_45t_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..648c28ecf61c8e65a4122bbb5b68e50461f1f7ba --- /dev/null +++ b/processing_ernie_45t_vl.py @@ -0,0 +1,475 @@ +# Copyright (c) 2025 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Processor class for Ernie_45T_VL.""" + +import copy +import io + +import numpy as np +import torch +from PIL import Image +from collections import defaultdict +from typing import Any, Dict, List, Union + +from .image_processing_ernie_45t_vl import Ernie_45T_VLImageProcessor +from .tokenization_ernie_45t_vl import Ernie4_5_VLTokenizer +from .video_utils_ernie_45t_vl import ( + read_frames_decord, + read_video_decord, + RAW_IMAGE_DIR, + get_downloadable, + render_frame_timestamp, +) + +from transformers.image_utils import ChannelDimension +from transformers.processing_utils import ProcessorMixin +from transformers.feature_extraction_utils import BatchFeature + + +IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} + + +class Ernie_45T_VLProcessor(ProcessorMixin): + """ + Processes multimodal chat messages into model-ready inputs, + handling text, images, and videos with 3D positional embeddings. + """ + + attributes = ["image_processor", "tokenizer"] + valid_kwargs = [ + "chat_template", + "spatial_conv_size", + "temporal_conv_size", + "image_min_pixels", + "image_max_pixels", + "video_min_pixels", + "video_max_pixels", + "video_target_frames", + "video_frames_sample", + "video_max_frames", + "video_min_frames", + "video_fps", + ] + image_processor_class = "AutoImageProcessor" + tokenizer_class = "AutoTokenizer" + + CLS_TOKEN = "<|begin_of_sentence|>" + SEP_TOKEN = "<|end_of_sentence|>" + IMG_START = "<|IMAGE_START|>" + IMG_END = "<|IMAGE_END|>" + VID_START = "<|VIDEO_START|>" + VID_END = "<|VIDEO_END|>" + + def __init__( + self, + image_processor=None, + tokenizer=None, + chat_template=None, + spatial_conv_size: int = 2, + temporal_conv_size: int = 2, + image_min_pixels: int = 4 * 28 * 28, + image_max_pixels: int = 6177 * 28 * 28, + video_min_pixels: int = 299 * 28 * 28, + video_max_pixels: int = 1196 * 28 * 28, + video_target_frames: int = -1, + video_frames_sample: str = "leading", + video_max_frames: int = 180, + video_min_frames: int = 16, + video_fps: int = 2, + **kwargs, + ): + super().__init__(image_processor, tokenizer, chat_template=chat_template) + self.tokenizer.ignored_index = -100 + + # Convolution sizes for patch aggregation + self.spatial_conv_size = spatial_conv_size + self.temporal_conv_size = temporal_conv_size + + # Pixel constraints + self.image_min_pixels = image_min_pixels + self.image_max_pixels = image_max_pixels + self.video_min_pixels = video_min_pixels + self.video_max_pixels = video_max_pixels + + # Video sampling parameters + self.target_frames = video_target_frames + self.frames_sample = video_frames_sample + self.max_frames = video_max_frames + self.min_frames = video_min_frames + self.fps = video_fps + + # Special tokens and IDs + self.cls_token = self.CLS_TOKEN + self.sep_token = self.SEP_TOKEN + self.image_start = self.IMG_START + self.image_end = self.IMG_END + self.video_start = self.VID_START + self.video_end = self.VID_END + self.image_patch_id = self.tokenizer.convert_tokens_to_ids( + "<|IMAGE_PLACEHOLDER|>" + ) + + self.token_type_mapping = self._build_token_type_mapping() + self.is_training = True + self.role_prefixes = {"system": "", "user": "User: ", "bot": "Assistant: "} + + def _build_token_type_mapping(self) -> Dict[Any, int]: + mapping = defaultdict(lambda: IDS_TYPE_FLAG["text"]) + for token in (self.IMG_START, self.IMG_END, self.VID_START, self.VID_END): + mapping[token] = IDS_TYPE_FLAG["image"] + mapping[self.image_patch_id] = IDS_TYPE_FLAG["image"] + return mapping + + def train(self) -> None: + """Enable training mode (produces labels).""" + self.is_training = True + + def eval(self) -> None: + """Enable evaluation mode (doesn't produce labels).""" + self.is_training = False + + def _download_image( + self, + item: Dict, + ): + """Download image from url and resize it to the specified size.""" + url_info = item.get("image_url", {}) + url = url_info.get("url") + w = url_info.get("image_width", None) + h = url_info.get("image_height", None) + data = get_downloadable(url, download_dir=RAW_IMAGE_DIR, save_to_disk=False) + + img = Image.open(io.BytesIO(data) if isinstance(data, bytes) else data) + if w and h: + img = img.resize((w, h)) + return img + + def _download_video(self, item: Dict): + """Download video from url and resize it to the specified size.""" + url_info = item.get("video_url", {}) + url = url_info.get("url") + + frames = self._load_and_process_video(url, item) + + pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + return pixel_stack + + def process_vision_info(self, messages: List[Dict[str, Any]]): + """Preprocess messages into lists of text, images, and videos.""" + images = [] + videos = [] + + for msg in messages: + content_items = msg.get("content") + if not isinstance(content_items, list): + content_items = [content_items] + + for item in content_items: + if item.get("type") == "image_url": + img = self._download_image(item) + images.append(img) + elif item.get("type") == "video_url": + pixel_stack = self._download_video(item) + videos.append(pixel_stack) + + return images, videos + + def __call__( + self, + text: List[str], + images: List[Image.Image], + videos: List[List[Image.Image]], + **kwargs, + ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: + """ + Convert chat messages into model inputs. + Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + """ + outputs = { + "input_ids": [], + "token_type_ids": [], + "position_ids": [], + "images": [], + "grid_thw": [], + "image_type_ids": [], + "cur_position": 0, + "pic_cnt": 0, + "video_cnt": 0, + } + texts = text[0] + + new_video_seg = True + for text_with_image in texts.split(self.VID_START + "<|video@placeholder|>" + self.VID_END): + new_text_seg = True + if not new_video_seg: + self._add_video(videos[outputs["video_cnt"]], outputs) + for text in text_with_image.split(self.IMG_START + "<|image@placeholder|>" + self.IMG_END): + if not new_text_seg: + self._add_image(images[outputs["pic_cnt"]], outputs) + self._add_text(text, outputs) + new_text_seg = False + new_video_seg = False + + for key in ["cur_position", "pic_cnt", "video_cnt"]: + outputs.pop(key, None) + + outputs = self._pack_outputs(outputs) + for key in outputs.keys(): + if isinstance(outputs[key], np.ndarray): + if key in ["images", "grid_thw"]: + outputs[key] = torch.tensor(np.array(outputs[key])) + else: + outputs[key] = torch.tensor(np.array([outputs[key]])) + + return BatchFeature(data=outputs) + + def _add_special_token(self, token: Union[str, int], outputs: Dict) -> None: + """add special token to outputs""" + token_id = ( + token + if isinstance(token, int) + else self.tokenizer.convert_tokens_to_ids(token) + ) + outputs["input_ids"].append(token_id) + outputs["token_type_ids"].append(self.token_type_mapping[token]) + pos = outputs["cur_position"] + outputs["position_ids"].append([pos] * 3) + outputs["cur_position"] += 1 + + def _add_text(self, text: str, outputs: Dict) -> None: + """add text to outputs""" + tokens = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(text)) + outputs["input_ids"].extend(tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) + + start = outputs["cur_position"] + for i in range(len(tokens)): + outputs["position_ids"].append([start + i] * 3) + outputs["cur_position"] += len(tokens) + + def _add_image(self, img: Image.Image, outputs: Dict) -> None: + """add image to outputs""" + outputs["pic_cnt"] += 1 + self._add_special_token(self.IMG_START, outputs) + + patches_h, patches_w = self.image_processor.get_smarted_resize( + img.height, + img.width, + min_pixels=self.image_min_pixels, + max_pixels=self.image_max_pixels, + )[1] + num_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2) + + outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) + + pos_ids = self._compute_3d_positions( + 1, patches_h, patches_w, outputs["cur_position"] + ) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + + # Preprocess pixels + ret = self.image_processor.preprocess( + images=[img.convert("RGB")], + do_normalize=False, + do_rescale=False, + predetermined_grid_thw=np.array([[patches_h, patches_w]]), + do_convert_rgb=True, + input_data_format=ChannelDimension.LAST, + ) + outputs["images"].append(ret["pixel_values"]) + outputs["grid_thw"].append(ret["image_grid_thw"]) + outputs["image_type_ids"].append(0) + + self._add_special_token(self.IMG_END, outputs) + + def _add_video( + self, pixel_stack: List[np.ndarray], outputs: Dict + ) -> None: + outputs["video_cnt"] += 1 + self._add_special_token(self.VID_START, outputs) + + patches_h, patches_w = self.image_processor.get_smarted_resize( + pixel_stack.shape[1], + pixel_stack.shape[2], + min_pixels=self.video_min_pixels, + max_pixels=self.video_max_pixels, + )[1] + num_frames = pixel_stack.shape[0] + num_tokens = (num_frames * patches_h * patches_w) // ( + self.spatial_conv_size**2 * self.temporal_conv_size + ) + + ret = self.image_processor.preprocess( + images=None, + videos=pixel_stack, + do_normalize=False, + do_rescale=False, + predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), + do_convert_rgb=True, + input_data_format=ChannelDimension.LAST, + ) + outputs["images"].append(ret["pixel_values_videos"]) + outputs["grid_thw"].append(ret["video_grid_thw"]) + outputs["image_type_ids"].extend([1] * num_frames) + + outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) + + pos_ids = self._compute_3d_positions( + num_frames, patches_h, patches_w, outputs["cur_position"] + ) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + + self._add_special_token(self.VID_END, outputs) + + def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: + reader, meta, path = read_video_decord(url, save_to_disk=False) + + video_frame_args = dict() + video_frame_args["fps"] = item.get("fps", self.fps) + video_frame_args["min_frames"] = item.get("min_frames", self.min_frames) + video_frame_args["max_frames"] = item.get("max_frames", self.max_frames) + video_frame_args["target_frames"] = item.get( + "target_frames", self.target_frames + ) + video_frame_args["frames_sample"] = item.get( + "frames_sample", self.frames_sample + ) + + video_frame_args = self._set_video_frame_args(video_frame_args, meta) + + frames_data, _, timestamps = read_frames_decord( + path, + reader, + meta, + target_frames=video_frame_args["target_frames"], + target_fps=video_frame_args["fps"], + frames_sample=video_frame_args["frames_sample"], + save_to_disk=False, + ) + + frames: List[Image.Image] = [] + for img_array, ts in zip(frames_data, timestamps): + frames.append(render_frame_timestamp(img_array, ts)) + # Ensure even number of frames for temporal conv + if len(frames) % 2 != 0: + frames.append(copy.deepcopy(frames[-1])) + return frames + + def _set_video_frame_args(self, video_frame_args, video_meta): + """ + Set the final frame extraction parameters based on known parameters and priorities + """ + # Priority: video_target_frames > (video_min_frames, video_max_frames) > video_fps + if video_frame_args["target_frames"] > 0: + if video_frame_args["fps"] >= 0: + raise ValueError("fps must be negative if target_frames is given") + if ( + video_frame_args["min_frames"] > 0 + and video_frame_args["target_frames"] < video_frame_args["min_frames"] + ): + raise ValueError("target_frames must be larger than min_frames") + if ( + video_frame_args["max_frames"] > 0 + and video_frame_args["target_frames"] > video_frame_args["max_frames"] + ): + raise ValueError("target_frames must be smaller than max_frames") + else: + if video_frame_args["fps"] < 0: + raise ValueError( + "Must provide either positive target_fps or positive target_frames." + ) + # First calculate the number of frames extracted under video_fps + frames_to_extract = int(video_meta["duration"] * video_frame_args["fps"]) + # Determine whether it is within the target range. If not, take target_frames as the upper or lower bound + if ( + video_frame_args["min_frames"] > 0 + and video_frame_args["max_frames"] > 0 + and video_frame_args["min_frames"] > video_frame_args["max_frames"] + ): + raise ValueError("min_frames must be smaller than max_frames") + if ( + video_frame_args["min_frames"] > 0 + and frames_to_extract < video_frame_args["min_frames"] + ): + video_frame_args["target_frames"] = video_frame_args["min_frames"] + video_frame_args["fps"] = -1 + if ( + video_frame_args["max_frames"] > 0 + and frames_to_extract > video_frame_args["max_frames"] + ): + video_frame_args["target_frames"] = video_frame_args["max_frames"] + video_frame_args["fps"] = -1 + + return video_frame_args + + def _compute_3d_positions( + self, t: int, h: int, w: int, start_idx: int + ) -> List[List[int]]: + # Downsample time if needed + t_eff = t // self.temporal_conv_size if t != 1 else 1 + gh, gw = h // self.spatial_conv_size, w // self.spatial_conv_size + time_idx = np.repeat(np.arange(t_eff), gh * gw) + h_idx = np.tile(np.repeat(np.arange(gh), gw), t_eff) + w_idx = np.tile(np.arange(gw), t_eff * gh) + + coords = list(zip(time_idx, h_idx, w_idx)) + return [ + [start_idx + ti, start_idx + hi, start_idx + wi] for ti, hi, wi in coords + ] + + def _pack_outputs(self, outs: Dict) -> Dict[str, Any]: + # Stack or nullify image-related fields + if not outs["images"]: + outs["images"] = None + outs["grid_thw"] = None + outs["image_type_ids"] = None + else: + outs["images"] = np.vstack(outs["images"]) + outs["grid_thw"] = np.vstack(outs["grid_thw"]) + outs["image_type_ids"] = np.array(outs["image_type_ids"]) + + # Convert lists to arrays + outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) + outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) + outs["position_ids"] = np.array(outs["position_ids"], dtype=np.int64) + return outs + + def batch_decode(self, *args, **kwargs): + """ + This method forwards all its arguments to Ernie4_5_VLTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please + refer to the docstring of this method for more information. + """ + return self.tokenizer.batch_decode(*args, **kwargs) + + def decode(self, *args, **kwargs): + """ + This method forwards all its arguments to Ernie4_5_VLTokenizer's [`~PreTrainedTokenizer.decode`]. + Please refer to the docstring of this method for more information. + """ + return self.tokenizer.decode(*args, **kwargs) + + @property + def model_input_names(self): + """get model input names""" + tokenizer_input_names = self.tokenizer.model_input_names + image_processor_input_names = self.image_processor.model_input_names + return list(tokenizer_input_names) + list(image_processor_input_names) + + +__all__ = ["Ernie_45T_VLProcessor"] diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f13a784d9b2466d980cc6047add021b2d6f922a0 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "", "sep_token": "<|end_of_sentence|>", "pad_token": "", "cls_token": "<|begin_of_sentence|>", "mask_token": "", "sys_start_token": "", "sys_end_token": "", "header_start_token": "", "header_end_token": "", "additional_special_tokens": ["<|IMAGE_PLACEHOLDER|>", "<|AUDIO_PLACEHOLDER|>", "<|LOC_0|>", "<|LOC_1|>", "<|LOC_2|>", "<|LOC_3|>", "<|LOC_4|>", "<|LOC_5|>", "<|LOC_6|>", "<|LOC_7|>", "<|LOC_8|>", "<|LOC_9|>", "<|LOC_10|>", "<|LOC_11|>", "<|LOC_12|>", "<|LOC_13|>", "<|LOC_14|>", "<|LOC_15|>", "<|LOC_16|>", "<|LOC_17|>", "<|LOC_18|>", "<|LOC_19|>", "<|LOC_20|>", "<|LOC_21|>", "<|LOC_22|>", "<|LOC_23|>", "<|LOC_24|>", "<|LOC_25|>", "<|LOC_26|>", "<|LOC_27|>", "<|LOC_28|>", "<|LOC_29|>", "<|LOC_30|>", "<|LOC_31|>", "<|LOC_32|>", "<|LOC_33|>", "<|LOC_34|>", "<|LOC_35|>", "<|LOC_36|>", "<|LOC_37|>", "<|LOC_38|>", "<|LOC_39|>", "<|LOC_40|>", "<|LOC_41|>", "<|LOC_42|>", "<|LOC_43|>", "<|LOC_44|>", "<|LOC_45|>", "<|LOC_46|>", "<|LOC_47|>", "<|LOC_48|>", "<|LOC_49|>", "<|LOC_50|>", "<|LOC_51|>", "<|LOC_52|>", "<|LOC_53|>", "<|LOC_54|>", "<|LOC_55|>", "<|LOC_56|>", "<|LOC_57|>", "<|LOC_58|>", "<|LOC_59|>", "<|LOC_60|>", "<|LOC_61|>", "<|LOC_62|>", "<|LOC_63|>", "<|LOC_64|>", "<|LOC_65|>", "<|LOC_66|>", "<|LOC_67|>", "<|LOC_68|>", "<|LOC_69|>", "<|LOC_70|>", "<|LOC_71|>", "<|LOC_72|>", "<|LOC_73|>", "<|LOC_74|>", "<|LOC_75|>", "<|LOC_76|>", "<|LOC_77|>", "<|LOC_78|>", "<|LOC_79|>", "<|LOC_80|>", "<|LOC_81|>", "<|LOC_82|>", "<|LOC_83|>", "<|LOC_84|>", "<|LOC_85|>", "<|LOC_86|>", "<|LOC_87|>", "<|LOC_88|>", "<|LOC_89|>", "<|LOC_90|>", "<|LOC_91|>", "<|LOC_92|>", "<|LOC_93|>", "<|LOC_94|>", "<|LOC_95|>", "<|LOC_96|>", "<|LOC_97|>", "<|LOC_98|>", "<|LOC_99|>", "<|LOC_100|>", "<|LOC_101|>", "<|LOC_102|>", "<|LOC_103|>", "<|LOC_104|>", "<|LOC_105|>", "<|LOC_106|>", "<|LOC_107|>", "<|LOC_108|>", "<|LOC_109|>", "<|LOC_110|>", "<|LOC_111|>", "<|LOC_112|>", "<|LOC_113|>", "<|LOC_114|>", "<|LOC_115|>", "<|LOC_116|>", "<|LOC_117|>", "<|LOC_118|>", "<|LOC_119|>", "<|LOC_120|>", "<|LOC_121|>", "<|LOC_122|>", "<|LOC_123|>", "<|LOC_124|>", "<|LOC_125|>", "<|LOC_126|>", "<|LOC_127|>", "<|LOC_128|>", "<|LOC_129|>", "<|LOC_130|>", "<|LOC_131|>", "<|LOC_132|>", "<|LOC_133|>", "<|LOC_134|>", "<|LOC_135|>", "<|LOC_136|>", "<|LOC_137|>", "<|LOC_138|>", "<|LOC_139|>", "<|LOC_140|>", "<|LOC_141|>", "<|LOC_142|>", "<|LOC_143|>", "<|LOC_144|>", "<|LOC_145|>", "<|LOC_146|>", "<|LOC_147|>", "<|LOC_148|>", "<|LOC_149|>", "<|LOC_150|>", "<|LOC_151|>", "<|LOC_152|>", "<|LOC_153|>", "<|LOC_154|>", "<|LOC_155|>", "<|LOC_156|>", "<|LOC_157|>", "<|LOC_158|>", "<|LOC_159|>", "<|LOC_160|>", "<|LOC_161|>", "<|LOC_162|>", "<|LOC_163|>", "<|LOC_164|>", "<|LOC_165|>", "<|LOC_166|>", "<|LOC_167|>", "<|LOC_168|>", "<|LOC_169|>", "<|LOC_170|>", "<|LOC_171|>", "<|LOC_172|>", "<|LOC_173|>", "<|LOC_174|>", "<|LOC_175|>", "<|LOC_176|>", "<|LOC_177|>", "<|LOC_178|>", "<|LOC_179|>", "<|LOC_180|>", "<|LOC_181|>", "<|LOC_182|>", "<|LOC_183|>", "<|LOC_184|>", "<|LOC_185|>", "<|LOC_186|>", "<|LOC_187|>", "<|LOC_188|>", "<|LOC_189|>", "<|LOC_190|>", "<|LOC_191|>", "<|LOC_192|>", "<|LOC_193|>", "<|LOC_194|>", "<|LOC_195|>", "<|LOC_196|>", "<|LOC_197|>", "<|LOC_198|>", "<|LOC_199|>", "<|LOC_200|>", "<|LOC_201|>", "<|LOC_202|>", "<|LOC_203|>", "<|LOC_204|>", "<|LOC_205|>", "<|LOC_206|>", "<|LOC_207|>", "<|LOC_208|>", "<|LOC_209|>", "<|LOC_210|>", "<|LOC_211|>", "<|LOC_212|>", "<|LOC_213|>", "<|LOC_214|>", "<|LOC_215|>", "<|LOC_216|>", "<|LOC_217|>", "<|LOC_218|>", "<|LOC_219|>", "<|LOC_220|>", "<|LOC_221|>", "<|LOC_222|>", "<|LOC_223|>", "<|LOC_224|>", "<|LOC_225|>", "<|LOC_226|>", "<|LOC_227|>", "<|LOC_228|>", "<|LOC_229|>", "<|LOC_230|>", "<|LOC_231|>", "<|LOC_232|>", "<|LOC_233|>", "<|LOC_234|>", "<|LOC_235|>", "<|LOC_236|>", "<|LOC_237|>", "<|LOC_238|>", "<|LOC_239|>", "<|LOC_240|>", "<|LOC_241|>", "<|LOC_242|>", "<|LOC_243|>", "<|LOC_244|>", "<|LOC_245|>", "<|LOC_246|>", "<|LOC_247|>", "<|LOC_248|>", "<|LOC_249|>", "<|LOC_250|>", "<|LOC_251|>", "<|LOC_252|>", "<|LOC_253|>", "<|LOC_254|>", "<|LOC_255|>", "<|LOC_256|>", "<|LOC_257|>", "<|LOC_258|>", "<|LOC_259|>", "<|LOC_260|>", "<|LOC_261|>", "<|LOC_262|>", "<|LOC_263|>", "<|LOC_264|>", "<|LOC_265|>", "<|LOC_266|>", "<|LOC_267|>", "<|LOC_268|>", "<|LOC_269|>", "<|LOC_270|>", "<|LOC_271|>", "<|LOC_272|>", "<|LOC_273|>", "<|LOC_274|>", "<|LOC_275|>", "<|LOC_276|>", "<|LOC_277|>", "<|LOC_278|>", "<|LOC_279|>", "<|LOC_280|>", "<|LOC_281|>", "<|LOC_282|>", "<|LOC_283|>", "<|LOC_284|>", "<|LOC_285|>", "<|LOC_286|>", "<|LOC_287|>", "<|LOC_288|>", "<|LOC_289|>", "<|LOC_290|>", "<|LOC_291|>", "<|LOC_292|>", "<|LOC_293|>", "<|LOC_294|>", "<|LOC_295|>", "<|LOC_296|>", "<|LOC_297|>", "<|LOC_298|>", "<|LOC_299|>", "<|LOC_300|>", "<|LOC_301|>", "<|LOC_302|>", "<|LOC_303|>", "<|LOC_304|>", "<|LOC_305|>", "<|LOC_306|>", "<|LOC_307|>", "<|LOC_308|>", "<|LOC_309|>", "<|LOC_310|>", "<|LOC_311|>", "<|LOC_312|>", "<|LOC_313|>", "<|LOC_314|>", "<|LOC_315|>", "<|LOC_316|>", "<|LOC_317|>", "<|LOC_318|>", "<|LOC_319|>", "<|LOC_320|>", "<|LOC_321|>", "<|LOC_322|>", "<|LOC_323|>", "<|LOC_324|>", "<|LOC_325|>", "<|LOC_326|>", "<|LOC_327|>", "<|LOC_328|>", "<|LOC_329|>", "<|LOC_330|>", "<|LOC_331|>", "<|LOC_332|>", "<|LOC_333|>", "<|LOC_334|>", "<|LOC_335|>", "<|LOC_336|>", "<|LOC_337|>", "<|LOC_338|>", "<|LOC_339|>", "<|LOC_340|>", "<|LOC_341|>", "<|LOC_342|>", "<|LOC_343|>", "<|LOC_344|>", "<|LOC_345|>", "<|LOC_346|>", "<|LOC_347|>", "<|LOC_348|>", "<|LOC_349|>", "<|LOC_350|>", "<|LOC_351|>", "<|LOC_352|>", "<|LOC_353|>", "<|LOC_354|>", "<|LOC_355|>", "<|LOC_356|>", "<|LOC_357|>", "<|LOC_358|>", "<|LOC_359|>", "<|LOC_360|>", "<|LOC_361|>", "<|LOC_362|>", "<|LOC_363|>", "<|LOC_364|>", "<|LOC_365|>", "<|LOC_366|>", "<|LOC_367|>", "<|LOC_368|>", "<|LOC_369|>", "<|LOC_370|>", "<|LOC_371|>", "<|LOC_372|>", "<|LOC_373|>", "<|LOC_374|>", "<|LOC_375|>", "<|LOC_376|>", "<|LOC_377|>", "<|LOC_378|>", "<|LOC_379|>", "<|LOC_380|>", "<|LOC_381|>", "<|LOC_382|>", "<|LOC_383|>", "<|LOC_384|>", "<|LOC_385|>", "<|LOC_386|>", "<|LOC_387|>", "<|LOC_388|>", "<|LOC_389|>", "<|LOC_390|>", "<|LOC_391|>", "<|LOC_392|>", "<|LOC_393|>", "<|LOC_394|>", "<|LOC_395|>", "<|LOC_396|>", "<|LOC_397|>", "<|LOC_398|>", "<|LOC_399|>", "<|LOC_400|>", "<|LOC_401|>", "<|LOC_402|>", "<|LOC_403|>", "<|LOC_404|>", "<|LOC_405|>", "<|LOC_406|>", "<|LOC_407|>", "<|LOC_408|>", "<|LOC_409|>", "<|LOC_410|>", "<|LOC_411|>", "<|LOC_412|>", "<|LOC_413|>", "<|LOC_414|>", "<|LOC_415|>", "<|LOC_416|>", "<|LOC_417|>", "<|LOC_418|>", "<|LOC_419|>", "<|LOC_420|>", "<|LOC_421|>", "<|LOC_422|>", "<|LOC_423|>", "<|LOC_424|>", "<|LOC_425|>", "<|LOC_426|>", "<|LOC_427|>", "<|LOC_428|>", "<|LOC_429|>", "<|LOC_430|>", "<|LOC_431|>", "<|LOC_432|>", "<|LOC_433|>", "<|LOC_434|>", "<|LOC_435|>", "<|LOC_436|>", "<|LOC_437|>", "<|LOC_438|>", "<|LOC_439|>", "<|LOC_440|>", "<|LOC_441|>", "<|LOC_442|>", "<|LOC_443|>", "<|LOC_444|>", "<|LOC_445|>", "<|LOC_446|>", "<|LOC_447|>", "<|LOC_448|>", "<|LOC_449|>", "<|LOC_450|>", "<|LOC_451|>", "<|LOC_452|>", "<|LOC_453|>", "<|LOC_454|>", "<|LOC_455|>", "<|LOC_456|>", "<|LOC_457|>", "<|LOC_458|>", "<|LOC_459|>", "<|LOC_460|>", "<|LOC_461|>", "<|LOC_462|>", "<|LOC_463|>", "<|LOC_464|>", "<|LOC_465|>", "<|LOC_466|>", "<|LOC_467|>", "<|LOC_468|>", "<|LOC_469|>", "<|LOC_470|>", "<|LOC_471|>", "<|LOC_472|>", "<|LOC_473|>", "<|LOC_474|>", "<|LOC_475|>", "<|LOC_476|>", "<|LOC_477|>", "<|LOC_478|>", "<|LOC_479|>", "<|LOC_480|>", "<|LOC_481|>", "<|LOC_482|>", "<|LOC_483|>", "<|LOC_484|>", "<|LOC_485|>", "<|LOC_486|>", "<|LOC_487|>", "<|LOC_488|>", "<|LOC_489|>", "<|LOC_490|>", "<|LOC_491|>", "<|LOC_492|>", "<|LOC_493|>", "<|LOC_494|>", "<|LOC_495|>", "<|LOC_496|>", "<|LOC_497|>", "<|LOC_498|>", "<|LOC_499|>", "<|LOC_500|>", "<|LOC_501|>", "<|LOC_502|>", "<|LOC_503|>", "<|LOC_504|>", "<|LOC_505|>", "<|LOC_506|>", "<|LOC_507|>", "<|LOC_508|>", "<|LOC_509|>", "<|LOC_510|>", "<|LOC_511|>", "<|LOC_512|>", "<|LOC_513|>", "<|LOC_514|>", "<|LOC_515|>", "<|LOC_516|>", "<|LOC_517|>", "<|LOC_518|>", "<|LOC_519|>", "<|LOC_520|>", "<|LOC_521|>", "<|LOC_522|>", "<|LOC_523|>", "<|LOC_524|>", "<|LOC_525|>", "<|LOC_526|>", "<|LOC_527|>", "<|LOC_528|>", "<|LOC_529|>", "<|LOC_530|>", "<|LOC_531|>", "<|LOC_532|>", "<|LOC_533|>", "<|LOC_534|>", "<|LOC_535|>", "<|LOC_536|>", "<|LOC_537|>", "<|LOC_538|>", "<|LOC_539|>", "<|LOC_540|>", "<|LOC_541|>", "<|LOC_542|>", "<|LOC_543|>", "<|LOC_544|>", "<|LOC_545|>", "<|LOC_546|>", "<|LOC_547|>", "<|LOC_548|>", "<|LOC_549|>", "<|LOC_550|>", "<|LOC_551|>", "<|LOC_552|>", "<|LOC_553|>", "<|LOC_554|>", "<|LOC_555|>", "<|LOC_556|>", "<|LOC_557|>", "<|LOC_558|>", "<|LOC_559|>", "<|LOC_560|>", "<|LOC_561|>", "<|LOC_562|>", "<|LOC_563|>", "<|LOC_564|>", "<|LOC_565|>", "<|LOC_566|>", "<|LOC_567|>", "<|LOC_568|>", "<|LOC_569|>", "<|LOC_570|>", "<|LOC_571|>", "<|LOC_572|>", "<|LOC_573|>", "<|LOC_574|>", "<|LOC_575|>", "<|LOC_576|>", "<|LOC_577|>", "<|LOC_578|>", "<|LOC_579|>", "<|LOC_580|>", "<|LOC_581|>", "<|LOC_582|>", "<|LOC_583|>", "<|LOC_584|>", "<|LOC_585|>", "<|LOC_586|>", "<|LOC_587|>", "<|LOC_588|>", "<|LOC_589|>", "<|LOC_590|>", "<|LOC_591|>", "<|LOC_592|>", "<|LOC_593|>", "<|LOC_594|>", "<|LOC_595|>", "<|LOC_596|>", "<|LOC_597|>", "<|LOC_598|>", "<|LOC_599|>", "<|LOC_600|>", "<|LOC_601|>", "<|LOC_602|>", "<|LOC_603|>", "<|LOC_604|>", "<|LOC_605|>", "<|LOC_606|>", "<|LOC_607|>", "<|LOC_608|>", "<|LOC_609|>", "<|LOC_610|>", "<|LOC_611|>", "<|LOC_612|>", "<|LOC_613|>", "<|LOC_614|>", "<|LOC_615|>", "<|LOC_616|>", "<|LOC_617|>", "<|LOC_618|>", "<|LOC_619|>", "<|LOC_620|>", "<|LOC_621|>", "<|LOC_622|>", "<|LOC_623|>", "<|LOC_624|>", "<|LOC_625|>", "<|LOC_626|>", "<|LOC_627|>", "<|LOC_628|>", "<|LOC_629|>", "<|LOC_630|>", "<|LOC_631|>", "<|LOC_632|>", "<|LOC_633|>", "<|LOC_634|>", "<|LOC_635|>", "<|LOC_636|>", "<|LOC_637|>", "<|LOC_638|>", "<|LOC_639|>", "<|LOC_640|>", "<|LOC_641|>", "<|LOC_642|>", "<|LOC_643|>", "<|LOC_644|>", "<|LOC_645|>", "<|LOC_646|>", "<|LOC_647|>", "<|LOC_648|>", "<|LOC_649|>", "<|LOC_650|>", "<|LOC_651|>", "<|LOC_652|>", "<|LOC_653|>", "<|LOC_654|>", "<|LOC_655|>", "<|LOC_656|>", "<|LOC_657|>", "<|LOC_658|>", "<|LOC_659|>", "<|LOC_660|>", "<|LOC_661|>", "<|LOC_662|>", "<|LOC_663|>", "<|LOC_664|>", "<|LOC_665|>", "<|LOC_666|>", "<|LOC_667|>", "<|LOC_668|>", "<|LOC_669|>", "<|LOC_670|>", "<|LOC_671|>", "<|LOC_672|>", "<|LOC_673|>", "<|LOC_674|>", "<|LOC_675|>", "<|LOC_676|>", "<|LOC_677|>", "<|LOC_678|>", "<|LOC_679|>", "<|LOC_680|>", "<|LOC_681|>", "<|LOC_682|>", "<|LOC_683|>", "<|LOC_684|>", "<|LOC_685|>", "<|LOC_686|>", "<|LOC_687|>", "<|LOC_688|>", "<|LOC_689|>", "<|LOC_690|>", "<|LOC_691|>", "<|LOC_692|>", "<|LOC_693|>", "<|LOC_694|>", "<|LOC_695|>", "<|LOC_696|>", "<|LOC_697|>", "<|LOC_698|>", "<|LOC_699|>", "<|LOC_700|>", "<|LOC_701|>", "<|LOC_702|>", "<|LOC_703|>", "<|LOC_704|>", "<|LOC_705|>", "<|LOC_706|>", "<|LOC_707|>", "<|LOC_708|>", "<|LOC_709|>", "<|LOC_710|>", "<|LOC_711|>", "<|LOC_712|>", "<|LOC_713|>", "<|LOC_714|>", "<|LOC_715|>", "<|LOC_716|>", "<|LOC_717|>", "<|LOC_718|>", "<|LOC_719|>", "<|LOC_720|>", "<|LOC_721|>", "<|LOC_722|>", "<|LOC_723|>", "<|LOC_724|>", "<|LOC_725|>", "<|LOC_726|>", "<|LOC_727|>", "<|LOC_728|>", "<|LOC_729|>", "<|LOC_730|>", "<|LOC_731|>", "<|LOC_732|>", "<|LOC_733|>", "<|LOC_734|>", "<|LOC_735|>", "<|LOC_736|>", "<|LOC_737|>", "<|LOC_738|>", "<|LOC_739|>", "<|LOC_740|>", "<|LOC_741|>", "<|LOC_742|>", "<|LOC_743|>", "<|LOC_744|>", "<|LOC_745|>", "<|LOC_746|>", "<|LOC_747|>", "<|LOC_748|>", "<|LOC_749|>", "<|LOC_750|>", "<|LOC_751|>", "<|LOC_752|>", "<|LOC_753|>", "<|LOC_754|>", "<|LOC_755|>", "<|LOC_756|>", "<|LOC_757|>", "<|LOC_758|>", "<|LOC_759|>", "<|LOC_760|>", "<|LOC_761|>", "<|LOC_762|>", "<|LOC_763|>", "<|LOC_764|>", "<|LOC_765|>", "<|LOC_766|>", "<|LOC_767|>", "<|LOC_768|>", "<|LOC_769|>", "<|LOC_770|>", "<|LOC_771|>", "<|LOC_772|>", "<|LOC_773|>", "<|LOC_774|>", "<|LOC_775|>", "<|LOC_776|>", "<|LOC_777|>", "<|LOC_778|>", "<|LOC_779|>", "<|LOC_780|>", "<|LOC_781|>", "<|LOC_782|>", "<|LOC_783|>", "<|LOC_784|>", "<|LOC_785|>", "<|LOC_786|>", "<|LOC_787|>", "<|LOC_788|>", "<|LOC_789|>", "<|LOC_790|>", "<|LOC_791|>", "<|LOC_792|>", "<|LOC_793|>", "<|LOC_794|>", "<|LOC_795|>", "<|LOC_796|>", "<|LOC_797|>", "<|LOC_798|>", "<|LOC_799|>", "<|LOC_800|>", "<|LOC_801|>", "<|LOC_802|>", "<|LOC_803|>", "<|LOC_804|>", "<|LOC_805|>", "<|LOC_806|>", "<|LOC_807|>", "<|LOC_808|>", "<|LOC_809|>", "<|LOC_810|>", "<|LOC_811|>", "<|LOC_812|>", "<|LOC_813|>", "<|LOC_814|>", "<|LOC_815|>", "<|LOC_816|>", "<|LOC_817|>", "<|LOC_818|>", "<|LOC_819|>", "<|LOC_820|>", "<|LOC_821|>", "<|LOC_822|>", "<|LOC_823|>", "<|LOC_824|>", "<|LOC_825|>", "<|LOC_826|>", "<|LOC_827|>", "<|LOC_828|>", "<|LOC_829|>", "<|LOC_830|>", "<|LOC_831|>", "<|LOC_832|>", "<|LOC_833|>", "<|LOC_834|>", "<|LOC_835|>", "<|LOC_836|>", "<|LOC_837|>", "<|LOC_838|>", "<|LOC_839|>", "<|LOC_840|>", "<|LOC_841|>", "<|LOC_842|>", "<|LOC_843|>", "<|LOC_844|>", "<|LOC_845|>", "<|LOC_846|>", "<|LOC_847|>", "<|LOC_848|>", "<|LOC_849|>", "<|LOC_850|>", "<|LOC_851|>", "<|LOC_852|>", "<|LOC_853|>", "<|LOC_854|>", "<|LOC_855|>", "<|LOC_856|>", "<|LOC_857|>", "<|LOC_858|>", "<|LOC_859|>", "<|LOC_860|>", "<|LOC_861|>", "<|LOC_862|>", "<|LOC_863|>", "<|LOC_864|>", "<|LOC_865|>", "<|LOC_866|>", "<|LOC_867|>", "<|LOC_868|>", "<|LOC_869|>", "<|LOC_870|>", "<|LOC_871|>", "<|LOC_872|>", "<|LOC_873|>", "<|LOC_874|>", "<|LOC_875|>", "<|LOC_876|>", "<|LOC_877|>", "<|LOC_878|>", "<|LOC_879|>", "<|LOC_880|>", "<|LOC_881|>", "<|LOC_882|>", "<|LOC_883|>", "<|LOC_884|>", "<|LOC_885|>", "<|LOC_886|>", "<|LOC_887|>", "<|LOC_888|>", "<|LOC_889|>", "<|LOC_890|>", "<|LOC_891|>", "<|LOC_892|>", "<|LOC_893|>", "<|LOC_894|>", "<|LOC_895|>", "<|LOC_896|>", "<|LOC_897|>", "<|LOC_898|>", "<|LOC_899|>", "<|LOC_900|>", "<|LOC_901|>", "<|LOC_902|>", "<|LOC_903|>", "<|LOC_904|>", "<|LOC_905|>", "<|LOC_906|>", "<|LOC_907|>", "<|LOC_908|>", "<|LOC_909|>", "<|LOC_910|>", "<|LOC_911|>", "<|LOC_912|>", "<|LOC_913|>", "<|LOC_914|>", "<|LOC_915|>", "<|LOC_916|>", "<|LOC_917|>", "<|LOC_918|>", "<|LOC_919|>", "<|LOC_920|>", "<|LOC_921|>", "<|LOC_922|>", "<|LOC_923|>", "<|LOC_924|>", "<|LOC_925|>", "<|LOC_926|>", "<|LOC_927|>", "<|LOC_928|>", "<|LOC_929|>", "<|LOC_930|>", "<|LOC_931|>", "<|LOC_932|>", "<|LOC_933|>", "<|LOC_934|>", "<|LOC_935|>", "<|LOC_936|>", "<|LOC_937|>", "<|LOC_938|>", "<|LOC_939|>", "<|LOC_940|>", "<|LOC_941|>", "<|LOC_942|>", "<|LOC_943|>", "<|LOC_944|>", "<|LOC_945|>", "<|LOC_946|>", "<|LOC_947|>", "<|LOC_948|>", "<|LOC_949|>", "<|LOC_950|>", "<|LOC_951|>", "<|LOC_952|>", "<|LOC_953|>", "<|LOC_954|>", "<|LOC_955|>", "<|LOC_956|>", "<|LOC_957|>", "<|LOC_958|>", "<|LOC_959|>", "<|LOC_960|>", "<|LOC_961|>", "<|LOC_962|>", "<|LOC_963|>", "<|LOC_964|>", "<|LOC_965|>", "<|LOC_966|>", "<|LOC_967|>", "<|LOC_968|>", "<|LOC_969|>", "<|LOC_970|>", "<|LOC_971|>", "<|LOC_972|>", "<|LOC_973|>", "<|LOC_974|>", "<|LOC_975|>", "<|LOC_976|>", "<|LOC_977|>", "<|LOC_978|>", "<|LOC_979|>", "<|LOC_980|>", "<|LOC_981|>", "<|LOC_982|>", "<|LOC_983|>", "<|LOC_984|>", "<|LOC_985|>", "<|LOC_986|>", "<|LOC_987|>", "<|LOC_988|>", "<|LOC_989|>", "<|LOC_990|>", "<|LOC_991|>", "<|LOC_992|>", "<|LOC_993|>", "<|LOC_994|>", "<|LOC_995|>", "<|LOC_996|>", "<|LOC_997|>", "<|LOC_998|>", "<|LOC_999|>", "<|LOC_1000|>", "<|LOC_BEGIN|>", "<|LOC_END|>", "<|LOC_SEP|>", "<|CROP_COL_SEP|>", "<|CROP_ROW_SEP|>", "<|IMAGE_SEP|>", "<|IMAGE_START|>", "<|IMAGE_END|>", "<|VIDEO_START|>", "<|VIDEO_END|>", "<|ASR_START|>", "<|ASR_END|>", "<|IMAGE_UNUSE:6|>", "<|IMAGE_UNUSE:7|>", "<|IMAGE_UNUSE:8|>", "<|IMAGE_UNUSE:9|>", "<|IMAGE_UNUSE:10|>", "<|IMAGE_UNUSE:11|>", "<|IMAGE_UNUSE:12|>", "<|IMAGE_UNUSE:13|>", "<|IMAGE_UNUSE:14|>", "<|IMAGE_UNUSE:15|>", "<|IMAGE_UNUSE:16|>", "<|IMAGE_UNUSE:17|>", "<|IMAGE_UNUSE:18|>", "<|IMAGE_UNUSE:19|>", "<|IMAGE_UNUSE:20|>", "<|IMAGE_UNUSE:21|>", "<|IMAGE_UNUSE:22|>", "<|IMAGE_UNUSE:23|>", "<|IMAGE_UNUSE:24|>", "<|IMAGE_UNUSE:25|>", "<|IMAGE_UNUSE:26|>", "<|IMAGE_UNUSE:27|>", "<|IMAGE_UNUSE:28|>", "<|IMAGE_UNUSE:29|>", "<|IMAGE_UNUSE:30|>", "<|IMAGE_UNUSE:31|>", "<|IMAGE_UNUSE:32|>", "<|IMAGE_UNUSE:33|>", "<|IMAGE_UNUSE:34|>", "<|IMAGE_UNUSE:35|>", "<|IMAGE_UNUSE:36|>", "<|IMAGE_UNUSE:37|>", "<|IMAGE_UNUSE:38|>", "<|IMAGE_UNUSE:39|>", "<|IMAGE_UNUSE:40|>", "<|IMAGE_UNUSE:41|>", "<|IMAGE_UNUSE:42|>", "<|IMAGE_UNUSE:43|>", "<|IMAGE_UNUSE:44|>", "<|IMAGE_UNUSE:45|>", "<|IMAGE_UNUSE:46|>", "<|IMAGE_UNUSE:47|>", "<|IMAGE_UNUSE:48|>", "<|IMAGE_UNUSE:49|>", "<|IMAGE_UNUSE:50|>", "<|IMAGE_UNUSE:51|>", "<|IMAGE_UNUSE:52|>", "<|IMAGE_UNUSE:53|>", "<|IMAGE_UNUSE:54|>", "<|IMAGE_UNUSE:55|>", "<|IMAGE_UNUSE:56|>", "<|IMAGE_UNUSE:57|>", "<|IMAGE_UNUSE:58|>", "<|IMAGE_UNUSE:59|>", "<|IMAGE_UNUSE:60|>", "<|IMAGE_UNUSE:61|>", "<|IMAGE_UNUSE:62|>", "<|IMAGE_UNUSE:63|>", "<|IMAGE_UNUSE:64|>", "<|IMAGE_UNUSE:65|>", "<|IMAGE_UNUSE:66|>", "<|IMAGE_UNUSE:67|>", "<|IMAGE_UNUSE:68|>", "<|IMAGE_UNUSE:69|>", "<|IMAGE_UNUSE:70|>", "<|IMAGE_UNUSE:71|>", "<|IMAGE_UNUSE:72|>", "<|IMAGE_UNUSE:73|>", "<|IMAGE_UNUSE:74|>", "<|IMAGE_UNUSE:75|>", "<|IMAGE_UNUSE:76|>", "<|IMAGE_UNUSE:77|>", "<|IMAGE_UNUSE:78|>", "<|IMAGE_UNUSE:79|>", "<|IMAGE_UNUSE:80|>", "<|IMAGE_UNUSE:81|>", "<|IMAGE_UNUSE:82|>", "<|IMAGE_UNUSE:83|>", "<|IMAGE_UNUSE:84|>", "<|IMAGE_UNUSE:85|>", "<|IMAGE_UNUSE:86|>", "<|IMAGE_UNUSE:87|>", "<|IMAGE_UNUSE:88|>", "<|IMAGE_UNUSE:89|>", "<|IMAGE_UNUSE:90|>", "<|IMAGE_UNUSE:91|>", "<|IMAGE_UNUSE:92|>", "<|IMAGE_UNUSE:93|>", "<|IMAGE_UNUSE:94|>", "<|IMAGE_UNUSE:95|>", "<|IMAGE_UNUSE:96|>", "<|IMAGE_UNUSE:97|>", "<|IMAGE_UNUSE:98|>", "<|IMAGE_UNUSE:99|>", "<|IMAGE_UNUSE:100|>", "<|IMAGE_UNUSE:101|>", "<|IMAGE_UNUSE:102|>", "<|IMAGE_UNUSE:103|>", "<|IMAGE_UNUSE:104|>", "<|IMAGE_UNUSE:105|>", "<|IMAGE_UNUSE:106|>", "<|IMAGE_UNUSE:107|>", "<|IMAGE_UNUSE:108|>", "<|IMAGE_UNUSE:109|>", "<|IMAGE_UNUSE:110|>", "<|IMAGE_UNUSE:111|>", "<|IMAGE_UNUSE:112|>", "<|IMAGE_UNUSE:113|>", "<|IMAGE_UNUSE:114|>", "<|IMAGE_UNUSE:115|>", "<|IMAGE_UNUSE:116|>", "<|IMAGE_UNUSE:117|>", "<|IMAGE_UNUSE:118|>", "<|IMAGE_UNUSE:119|>", "<|IMAGE_UNUSE:120|>", "<|IMAGE_UNUSE:121|>", "<|IMAGE_UNUSE:122|>", "<|IMAGE_UNUSE:123|>", "<|IMAGE_UNUSE:124|>", "<|IMAGE_UNUSE:125|>", "<|IMAGE_UNUSE:126|>", "<|IMAGE_UNUSE:127|>", "<|IMAGE_UNUSE:128|>", "<|IMAGE_UNUSE:129|>", "<|IMAGE_UNUSE:130|>", "<|IMAGE_UNUSE:131|>", "<|IMAGE_UNUSE:132|>", "<|IMAGE_UNUSE:133|>", "<|IMAGE_UNUSE:134|>", "<|IMAGE_UNUSE:135|>", "<|IMAGE_UNUSE:136|>", "<|IMAGE_UNUSE:137|>", "<|IMAGE_UNUSE:138|>", "<|IMAGE_UNUSE:139|>", "<|IMAGE_UNUSE:140|>", "<|IMAGE_UNUSE:141|>", "<|IMAGE_UNUSE:142|>", "<|IMAGE_UNUSE:143|>", "<|IMAGE_UNUSE:144|>", "<|IMAGE_UNUSE:145|>", "<|IMAGE_UNUSE:146|>", "<|IMAGE_UNUSE:147|>", "<|IMAGE_UNUSE:148|>", "<|IMAGE_UNUSE:149|>", "<|IMAGE_UNUSE:150|>", "<|IMAGE_UNUSE:151|>", "<|IMAGE_UNUSE:152|>", "<|IMAGE_UNUSE:153|>", "<|IMAGE_UNUSE:154|>", "<|IMAGE_UNUSE:155|>", "<|IMAGE_UNUSE:156|>", "<|IMAGE_UNUSE:157|>", "<|IMAGE_UNUSE:158|>", "<|IMAGE_UNUSE:159|>", "<|IMAGE_UNUSE:160|>", "<|IMAGE_UNUSE:161|>", "<|IMAGE_UNUSE:162|>", "<|IMAGE_UNUSE:163|>", "<|IMAGE_UNUSE:164|>", "<|IMAGE_UNUSE:165|>", "<|IMAGE_UNUSE:166|>", "<|IMAGE_UNUSE:167|>", "<|IMAGE_UNUSE:168|>", "<|IMAGE_UNUSE:169|>", "<|IMAGE_UNUSE:170|>", "<|IMAGE_UNUSE:171|>", "<|IMAGE_UNUSE:172|>", "<|IMAGE_UNUSE:173|>", "<|IMAGE_UNUSE:174|>", "<|IMAGE_UNUSE:175|>", "<|IMAGE_UNUSE:176|>", "<|IMAGE_UNUSE:177|>", "<|IMAGE_UNUSE:178|>", "<|IMAGE_UNUSE:179|>", "<|IMAGE_UNUSE:180|>", "<|IMAGE_UNUSE:181|>", "<|IMAGE_UNUSE:182|>", "<|IMAGE_UNUSE:183|>", "<|IMAGE_UNUSE:184|>", "<|IMAGE_UNUSE:185|>", "<|IMAGE_UNUSE:186|>", "<|IMAGE_UNUSE:187|>", "<|IMAGE_UNUSE:188|>", "<|IMAGE_UNUSE:189|>", "<|IMAGE_UNUSE:190|>", "<|IMAGE_UNUSE:191|>", "<|IMAGE_UNUSE:192|>", "<|IMAGE_UNUSE:193|>", "<|IMAGE_UNUSE:194|>", "<|IMAGE_UNUSE:195|>", "<|IMAGE_UNUSE:196|>", "<|IMAGE_UNUSE:197|>", "<|IMAGE_UNUSE:198|>", "<|IMAGE_UNUSE:199|>", "<|IMAGE_UNUSE:200|>", "<|IMAGE_UNUSE:201|>", "<|IMAGE_UNUSE:202|>", "<|IMAGE_UNUSE:203|>", "<|IMAGE_UNUSE:204|>", "<|IMAGE_UNUSE:205|>", "<|IMAGE_UNUSE:206|>", "<|IMAGE_UNUSE:207|>", "<|IMAGE_UNUSE:208|>", "<|IMAGE_UNUSE:209|>", "<|IMAGE_UNUSE:210|>", "<|IMAGE_UNUSE:211|>", "<|IMAGE_UNUSE:212|>", "<|IMAGE_UNUSE:213|>", "<|IMAGE_UNUSE:214|>", "<|IMAGE_UNUSE:215|>", "<|IMAGE_UNUSE:216|>", "<|IMAGE_UNUSE:217|>", "<|IMAGE_UNUSE:218|>", "<|IMAGE_UNUSE:219|>", "<|IMAGE_UNUSE:220|>", "<|IMAGE_UNUSE:221|>", "<|IMAGE_UNUSE:222|>", "<|IMAGE_UNUSE:223|>", "<|IMAGE_UNUSE:224|>", "<|IMAGE_UNUSE:225|>", "<|IMAGE_UNUSE:226|>", "<|IMAGE_UNUSE:227|>", "<|IMAGE_UNUSE:228|>", "<|IMAGE_UNUSE:229|>", "<|IMAGE_UNUSE:230|>", "<|IMAGE_UNUSE:231|>", "<|IMAGE_UNUSE:232|>", "<|IMAGE_UNUSE:233|>", "<|IMAGE_UNUSE:234|>", "<|IMAGE_UNUSE:235|>", "<|IMAGE_UNUSE:236|>", "<|IMAGE_UNUSE:237|>", "<|IMAGE_UNUSE:238|>", "<|IMAGE_UNUSE:239|>", "<|IMAGE_UNUSE:240|>", "<|IMAGE_UNUSE:241|>", "<|IMAGE_UNUSE:242|>", "<|IMAGE_UNUSE:243|>", "<|IMAGE_UNUSE:244|>", "<|IMAGE_UNUSE:245|>", "<|IMAGE_UNUSE:246|>", "<|IMAGE_UNUSE:247|>", "<|IMAGE_UNUSE:248|>", "<|IMAGE_UNUSE:249|>", "<|IMAGE_UNUSE:250|>", "<|IMAGE_UNUSE:251|>", "<|IMAGE_UNUSE:252|>", "<|IMAGE_UNUSE:253|>", "<|IMAGE_UNUSE:254|>", "<|IMAGE_UNUSE:255|>", "<|IMAGE_UNUSE:256|>", "<|IMAGE_UNUSE:257|>", "<|IMAGE_UNUSE:258|>", "<|IMAGE_UNUSE:259|>", "<|IMAGE_UNUSE:260|>", "<|IMAGE_UNUSE:261|>", "<|IMAGE_UNUSE:262|>", "<|IMAGE_UNUSE:263|>", "<|IMAGE_UNUSE:264|>", "<|IMAGE_UNUSE:265|>", "<|IMAGE_UNUSE:266|>", "<|IMAGE_UNUSE:267|>", "<|IMAGE_UNUSE:268|>", "<|IMAGE_UNUSE:269|>", "<|IMAGE_UNUSE:270|>", "<|IMAGE_UNUSE:271|>", "<|IMAGE_UNUSE:272|>", "<|IMAGE_UNUSE:273|>", "<|IMAGE_UNUSE:274|>", "<|IMAGE_UNUSE:275|>", "<|IMAGE_UNUSE:276|>", "<|IMAGE_UNUSE:277|>", "<|IMAGE_UNUSE:278|>", "<|IMAGE_UNUSE:279|>", "<|IMAGE_UNUSE:280|>", "<|IMAGE_UNUSE:281|>", "<|IMAGE_UNUSE:282|>", "<|IMAGE_UNUSE:283|>", "<|IMAGE_UNUSE:284|>", "<|IMAGE_UNUSE:285|>", "<|IMAGE_UNUSE:286|>", "<|IMAGE_UNUSE:287|>", "<|IMAGE_UNUSE:288|>", "<|IMAGE_UNUSE:289|>", "<|IMAGE_UNUSE:290|>", "<|IMAGE_UNUSE:291|>", "<|IMAGE_UNUSE:292|>", "<|IMAGE_UNUSE:293|>", "<|IMAGE_UNUSE:294|>", "<|IMAGE_UNUSE:295|>", "<|IMAGE_UNUSE:296|>", "<|IMAGE_UNUSE:297|>", "<|IMAGE_UNUSE:298|>", "<|IMAGE_UNUSE:299|>", "<|IMAGE_UNUSE:300|>", "<|IMAGE_UNUSE:301|>", "<|IMAGE_UNUSE:302|>", "<|IMAGE_UNUSE:303|>", "<|IMAGE_UNUSE:304|>", "<|IMAGE_UNUSE:305|>", "<|IMAGE_UNUSE:306|>", "<|IMAGE_UNUSE:307|>", "<|IMAGE_UNUSE:308|>", "<|IMAGE_UNUSE:309|>", "<|IMAGE_UNUSE:310|>", "<|IMAGE_UNUSE:311|>", "<|IMAGE_UNUSE:312|>", "<|IMAGE_UNUSE:313|>", "<|IMAGE_UNUSE:314|>", "<|IMAGE_UNUSE:315|>", "<|IMAGE_UNUSE:316|>", "<|IMAGE_UNUSE:317|>", "<|IMAGE_UNUSE:318|>", "<|IMAGE_UNUSE:319|>", "<|IMAGE_UNUSE:320|>", "<|IMAGE_UNUSE:321|>", "<|IMAGE_UNUSE:322|>", "<|IMAGE_UNUSE:323|>", "<|IMAGE_UNUSE:324|>", "<|IMAGE_UNUSE:325|>", "<|IMAGE_UNUSE:326|>", "<|IMAGE_UNUSE:327|>", "<|IMAGE_UNUSE:328|>", "<|IMAGE_UNUSE:329|>", "<|IMAGE_UNUSE:330|>", "<|IMAGE_UNUSE:331|>", "<|IMAGE_UNUSE:332|>", "<|IMAGE_UNUSE:333|>", "<|IMAGE_UNUSE:334|>", "<|IMAGE_UNUSE:335|>", "<|IMAGE_UNUSE:336|>", "<|IMAGE_UNUSE:337|>", "<|IMAGE_UNUSE:338|>", "<|IMAGE_UNUSE:339|>", "<|IMAGE_UNUSE:340|>", "<|IMAGE_UNUSE:341|>", "<|IMAGE_UNUSE:342|>", "<|IMAGE_UNUSE:343|>", "<|IMAGE_UNUSE:344|>", "<|IMAGE_UNUSE:345|>", "<|IMAGE_UNUSE:346|>", "<|IMAGE_UNUSE:347|>", "<|IMAGE_UNUSE:348|>", "<|IMAGE_UNUSE:349|>", "<|IMAGE_UNUSE:350|>", "<|IMAGE_UNUSE:351|>", "<|IMAGE_UNUSE:352|>", "<|IMAGE_UNUSE:353|>", "<|IMAGE_UNUSE:354|>", "<|IMAGE_UNUSE:355|>", "<|IMAGE_UNUSE:356|>", "<|IMAGE_UNUSE:357|>", "<|IMAGE_UNUSE:358|>", "<|IMAGE_UNUSE:359|>", "<|IMAGE_UNUSE:360|>", "<|IMAGE_UNUSE:361|>", "<|IMAGE_UNUSE:362|>", "<|IMAGE_UNUSE:363|>", "<|IMAGE_UNUSE:364|>", "<|IMAGE_UNUSE:365|>", "<|IMAGE_UNUSE:366|>", "<|IMAGE_UNUSE:367|>", "<|IMAGE_UNUSE:368|>", "<|IMAGE_UNUSE:369|>", "<|IMAGE_UNUSE:370|>", "<|IMAGE_UNUSE:371|>", "<|IMAGE_UNUSE:372|>", "<|IMAGE_UNUSE:373|>", "<|IMAGE_UNUSE:374|>", "<|IMAGE_UNUSE:375|>", "<|IMAGE_UNUSE:376|>", "<|IMAGE_UNUSE:377|>", "<|IMAGE_UNUSE:378|>", "<|IMAGE_UNUSE:379|>", "<|IMAGE_UNUSE:380|>", "<|IMAGE_UNUSE:381|>", "<|IMAGE_UNUSE:382|>", "<|IMAGE_UNUSE:383|>", "<|IMAGE_UNUSE:384|>", "<|IMAGE_UNUSE:385|>", "<|IMAGE_UNUSE:386|>", "<|IMAGE_UNUSE:387|>", "<|IMAGE_UNUSE:388|>", "<|IMAGE_UNUSE:389|>", "<|IMAGE_UNUSE:390|>", "<|IMAGE_UNUSE:391|>", "<|IMAGE_UNUSE:392|>", "<|IMAGE_UNUSE:393|>", "<|IMAGE_UNUSE:394|>", "<|IMAGE_UNUSE:395|>", "<|IMAGE_UNUSE:396|>", "<|IMAGE_UNUSE:397|>", "<|IMAGE_UNUSE:398|>", "<|IMAGE_UNUSE:399|>", "<|IMAGE_UNUSE:400|>", "<|IMAGE_UNUSE:401|>", "<|IMAGE_UNUSE:402|>", "<|IMAGE_UNUSE:403|>", "<|IMAGE_UNUSE:404|>", "<|IMAGE_UNUSE:405|>", "<|IMAGE_UNUSE:406|>", "<|IMAGE_UNUSE:407|>", "<|IMAGE_UNUSE:408|>", "<|IMAGE_UNUSE:409|>", "<|IMAGE_UNUSE:410|>", "<|IMAGE_UNUSE:411|>", "<|IMAGE_UNUSE:412|>", "<|IMAGE_UNUSE:413|>", "<|IMAGE_UNUSE:414|>", "<|IMAGE_UNUSE:415|>", "<|IMAGE_UNUSE:416|>", "<|IMAGE_UNUSE:417|>", "<|IMAGE_UNUSE:418|>", "<|IMAGE_UNUSE:419|>", "<|IMAGE_UNUSE:420|>", "<|IMAGE_UNUSE:421|>", "<|IMAGE_UNUSE:422|>", "<|IMAGE_UNUSE:423|>", "<|IMAGE_UNUSE:424|>", "<|IMAGE_UNUSE:425|>", "<|IMAGE_UNUSE:426|>", "<|IMAGE_UNUSE:427|>", "<|IMAGE_UNUSE:428|>", "<|IMAGE_UNUSE:429|>", "<|IMAGE_UNUSE:430|>", "<|IMAGE_UNUSE:431|>", "<|IMAGE_UNUSE:432|>", "<|IMAGE_UNUSE:433|>", "<|IMAGE_UNUSE:434|>", "<|IMAGE_UNUSE:435|>", "<|IMAGE_UNUSE:436|>", "<|IMAGE_UNUSE:437|>", "<|IMAGE_UNUSE:438|>", "<|IMAGE_UNUSE:439|>", "<|IMAGE_UNUSE:440|>", "<|IMAGE_UNUSE:441|>", "<|IMAGE_UNUSE:442|>", "<|IMAGE_UNUSE:443|>", "<|IMAGE_UNUSE:444|>", "<|IMAGE_UNUSE:445|>", "<|IMAGE_UNUSE:446|>", "<|IMAGE_UNUSE:447|>", "<|IMAGE_UNUSE:448|>", "<|IMAGE_UNUSE:449|>", "<|IMAGE_UNUSE:450|>", "<|IMAGE_UNUSE:451|>", "<|IMAGE_UNUSE:452|>", "<|IMAGE_UNUSE:453|>", "<|IMAGE_UNUSE:454|>", "<|IMAGE_UNUSE:455|>", "<|IMAGE_UNUSE:456|>", "<|IMAGE_UNUSE:457|>", "<|IMAGE_UNUSE:458|>", "<|IMAGE_UNUSE:459|>", "<|IMAGE_UNUSE:460|>", "<|IMAGE_UNUSE:461|>", "<|IMAGE_UNUSE:462|>", "<|IMAGE_UNUSE:463|>", "<|IMAGE_UNUSE:464|>", "<|IMAGE_UNUSE:465|>", "<|IMAGE_UNUSE:466|>", "<|IMAGE_UNUSE:467|>", "<|IMAGE_UNUSE:468|>", "<|IMAGE_UNUSE:469|>", "<|IMAGE_UNUSE:470|>", "<|IMAGE_UNUSE:471|>", "<|IMAGE_UNUSE:472|>", "<|IMAGE_UNUSE:473|>", "<|IMAGE_UNUSE:474|>", "<|IMAGE_UNUSE:475|>", "<|IMAGE_UNUSE:476|>", "<|IMAGE_UNUSE:477|>", "<|IMAGE_UNUSE:478|>", "<|IMAGE_UNUSE:479|>", "<|IMAGE_UNUSE:480|>", "<|IMAGE_UNUSE:481|>", "<|IMAGE_UNUSE:482|>", "<|IMAGE_UNUSE:483|>", "<|IMAGE_UNUSE:484|>", "<|IMAGE_UNUSE:485|>", "<|IMAGE_UNUSE:486|>", "<|IMAGE_UNUSE:487|>", "<|IMAGE_UNUSE:488|>", "<|IMAGE_UNUSE:489|>", "<|IMAGE_UNUSE:490|>", "<|IMAGE_UNUSE:491|>", "<|IMAGE_UNUSE:492|>", "<|IMAGE_UNUSE:493|>", "<|IMAGE_UNUSE:494|>", "<|IMAGE_UNUSE:495|>", "<|IMAGE_UNUSE:496|>", "<|IMAGE_UNUSE:497|>", "<|IMAGE_UNUSE:498|>", "<|IMAGE_UNUSE:499|>", "<|IMAGE_UNUSE:500|>", "<|IMAGE_UNUSE:501|>", "<|IMAGE_UNUSE:502|>", "<|IMAGE_UNUSE:503|>", "<|IMAGE_UNUSE:504|>", "<|IMAGE_UNUSE:505|>", "<|IMAGE_UNUSE:506|>", "<|IMAGE_UNUSE:507|>", "<|IMAGE_UNUSE:508|>", "<|IMAGE_UNUSE:509|>", "<|IMAGE_UNUSE:510|>", "<|IMAGE_UNUSE:511|>", "<|IMAGE_UNUSE:512|>", "<|IMAGE_UNUSE:513|>", "<|IMAGE_UNUSE:514|>", "<|IMAGE_UNUSE:515|>", "<|IMAGE_UNUSE:516|>", "<|IMAGE_UNUSE:517|>", "<|IMAGE_UNUSE:518|>", "<|IMAGE_UNUSE:519|>", "<|IMAGE_UNUSE:520|>", "<|IMAGE_UNUSE:521|>", "<|IMAGE_UNUSE:522|>", "<|IMAGE_UNUSE:523|>", "<|IMAGE_UNUSE:524|>", "<|IMAGE_UNUSE:525|>", "<|IMAGE_UNUSE:526|>", "<|IMAGE_UNUSE:527|>", "<|IMAGE_UNUSE:528|>", "<|IMAGE_UNUSE:529|>", "<|IMAGE_UNUSE:530|>", "<|IMAGE_UNUSE:531|>", "<|IMAGE_UNUSE:532|>", "<|IMAGE_UNUSE:533|>", "<|IMAGE_UNUSE:534|>", "<|IMAGE_UNUSE:535|>", "<|IMAGE_UNUSE:536|>", "<|IMAGE_UNUSE:537|>", "<|IMAGE_UNUSE:538|>", "<|IMAGE_UNUSE:539|>", "<|IMAGE_UNUSE:540|>", "<|IMAGE_UNUSE:541|>", "<|IMAGE_UNUSE:542|>", "<|IMAGE_UNUSE:543|>", "<|IMAGE_UNUSE:544|>", "<|IMAGE_UNUSE:545|>", "<|IMAGE_UNUSE:546|>", "<|IMAGE_UNUSE:547|>", "<|IMAGE_UNUSE:548|>", "<|IMAGE_UNUSE:549|>", "<|IMAGE_UNUSE:550|>", "<|IMAGE_UNUSE:551|>", "<|IMAGE_UNUSE:552|>", "<|IMAGE_UNUSE:553|>", "<|IMAGE_UNUSE:554|>", "<|IMAGE_UNUSE:555|>", "<|IMAGE_UNUSE:556|>", "<|IMAGE_UNUSE:557|>", "<|IMAGE_UNUSE:558|>", "<|IMAGE_UNUSE:559|>", "<|IMAGE_UNUSE:560|>", "<|IMAGE_UNUSE:561|>", "<|IMAGE_UNUSE:562|>", "<|IMAGE_UNUSE:563|>", "<|IMAGE_UNUSE:564|>", "<|IMAGE_UNUSE:565|>", "<|IMAGE_UNUSE:566|>", "<|IMAGE_UNUSE:567|>", "<|IMAGE_UNUSE:568|>", "<|IMAGE_UNUSE:569|>", "<|IMAGE_UNUSE:570|>", "<|IMAGE_UNUSE:571|>", "<|IMAGE_UNUSE:572|>", "<|IMAGE_UNUSE:573|>", "<|IMAGE_UNUSE:574|>", "<|IMAGE_UNUSE:575|>", "<|IMAGE_UNUSE:576|>", "<|IMAGE_UNUSE:577|>", "<|IMAGE_UNUSE:578|>", "<|IMAGE_UNUSE:579|>", "<|IMAGE_UNUSE:580|>", "<|IMAGE_UNUSE:581|>", "<|IMAGE_UNUSE:582|>", "<|IMAGE_UNUSE:583|>", "<|IMAGE_UNUSE:584|>", "<|IMAGE_UNUSE:585|>", "<|IMAGE_UNUSE:586|>", "<|IMAGE_UNUSE:587|>", "<|IMAGE_UNUSE:588|>", "<|IMAGE_UNUSE:589|>", "<|IMAGE_UNUSE:590|>", "<|IMAGE_UNUSE:591|>", "<|IMAGE_UNUSE:592|>", "<|IMAGE_UNUSE:593|>", "<|IMAGE_UNUSE:594|>", "<|IMAGE_UNUSE:595|>", "<|IMAGE_UNUSE:596|>", "<|IMAGE_UNUSE:597|>", "<|IMAGE_UNUSE:598|>", "<|IMAGE_UNUSE:599|>", "<|IMAGE_UNUSE:600|>", "<|IMAGE_UNUSE:601|>", "<|IMAGE_UNUSE:602|>", "<|IMAGE_UNUSE:603|>", "<|IMAGE_UNUSE:604|>", "<|IMAGE_UNUSE:605|>", "<|IMAGE_UNUSE:606|>", "<|IMAGE_UNUSE:607|>", "<|IMAGE_UNUSE:608|>", "<|IMAGE_UNUSE:609|>", "<|IMAGE_UNUSE:610|>", "<|IMAGE_UNUSE:611|>", "<|IMAGE_UNUSE:612|>", "<|IMAGE_UNUSE:613|>", "<|IMAGE_UNUSE:614|>", "<|IMAGE_UNUSE:615|>", "<|IMAGE_UNUSE:616|>", "<|IMAGE_UNUSE:617|>", "<|IMAGE_UNUSE:618|>", "<|IMAGE_UNUSE:619|>", "<|IMAGE_UNUSE:620|>", "<|IMAGE_UNUSE:621|>", "<|IMAGE_UNUSE:622|>", "<|IMAGE_UNUSE:623|>", "<|IMAGE_UNUSE:624|>", "<|IMAGE_UNUSE:625|>", "<|IMAGE_UNUSE:626|>", "<|IMAGE_UNUSE:627|>", "<|IMAGE_UNUSE:628|>", "<|IMAGE_UNUSE:629|>", "<|IMAGE_UNUSE:630|>", "<|IMAGE_UNUSE:631|>", "<|IMAGE_UNUSE:632|>", "<|IMAGE_UNUSE:633|>", "<|IMAGE_UNUSE:634|>", "<|IMAGE_UNUSE:635|>", "<|IMAGE_UNUSE:636|>", "<|IMAGE_UNUSE:637|>", "<|IMAGE_UNUSE:638|>", "<|IMAGE_UNUSE:639|>", "<|IMAGE_UNUSE:640|>", "<|IMAGE_UNUSE:641|>", "<|IMAGE_UNUSE:642|>", "<|IMAGE_UNUSE:643|>", "<|IMAGE_UNUSE:644|>", "<|IMAGE_UNUSE:645|>", "<|IMAGE_UNUSE:646|>", "<|IMAGE_UNUSE:647|>", "<|IMAGE_UNUSE:648|>", "<|IMAGE_UNUSE:649|>", "<|IMAGE_UNUSE:650|>", "<|IMAGE_UNUSE:651|>", "<|IMAGE_UNUSE:652|>", "<|IMAGE_UNUSE:653|>", "<|IMAGE_UNUSE:654|>", "<|IMAGE_UNUSE:655|>", "<|IMAGE_UNUSE:656|>", "<|IMAGE_UNUSE:657|>", "<|IMAGE_UNUSE:658|>", "<|IMAGE_UNUSE:659|>", "<|IMAGE_UNUSE:660|>", "<|IMAGE_UNUSE:661|>", "<|IMAGE_UNUSE:662|>", "<|IMAGE_UNUSE:663|>", "<|IMAGE_UNUSE:664|>", "<|IMAGE_UNUSE:665|>", "<|IMAGE_UNUSE:666|>", "<|IMAGE_UNUSE:667|>", "<|IMAGE_UNUSE:668|>", "<|IMAGE_UNUSE:669|>", "<|IMAGE_UNUSE:670|>", "<|IMAGE_UNUSE:671|>", "<|IMAGE_UNUSE:672|>", "<|IMAGE_UNUSE:673|>", "<|IMAGE_UNUSE:674|>", "<|IMAGE_UNUSE:675|>", "<|IMAGE_UNUSE:676|>", "<|IMAGE_UNUSE:677|>", "<|IMAGE_UNUSE:678|>", "<|IMAGE_UNUSE:679|>", "<|IMAGE_UNUSE:680|>", "<|IMAGE_UNUSE:681|>", "<|IMAGE_UNUSE:682|>", "<|IMAGE_UNUSE:683|>", "<|IMAGE_UNUSE:684|>", "<|IMAGE_UNUSE:685|>", "<|IMAGE_UNUSE:686|>", "<|IMAGE_UNUSE:687|>", "<|IMAGE_UNUSE:688|>", "<|IMAGE_UNUSE:689|>", "<|IMAGE_UNUSE:690|>", "<|IMAGE_UNUSE:691|>", "<|IMAGE_UNUSE:692|>", "<|IMAGE_UNUSE:693|>", "<|IMAGE_UNUSE:694|>", "<|IMAGE_UNUSE:695|>", "<|IMAGE_UNUSE:696|>", "<|IMAGE_UNUSE:697|>", "<|IMAGE_UNUSE:698|>", "<|IMAGE_UNUSE:699|>", "<|IMAGE_UNUSE:700|>", "<|IMAGE_UNUSE:701|>", "<|IMAGE_UNUSE:702|>", "<|IMAGE_UNUSE:703|>", "<|IMAGE_UNUSE:704|>", "<|IMAGE_UNUSE:705|>", "<|IMAGE_UNUSE:706|>", "<|IMAGE_UNUSE:707|>", "<|IMAGE_UNUSE:708|>", "<|IMAGE_UNUSE:709|>", "<|IMAGE_UNUSE:710|>", "<|IMAGE_UNUSE:711|>", "<|IMAGE_UNUSE:712|>", "<|IMAGE_UNUSE:713|>", "<|IMAGE_UNUSE:714|>", "<|IMAGE_UNUSE:715|>", "<|IMAGE_UNUSE:716|>", "<|IMAGE_UNUSE:717|>", "<|IMAGE_UNUSE:718|>", "<|IMAGE_UNUSE:719|>", "<|IMAGE_UNUSE:720|>", "<|IMAGE_UNUSE:721|>", "<|IMAGE_UNUSE:722|>", "<|IMAGE_UNUSE:723|>", "<|IMAGE_UNUSE:724|>", "<|IMAGE_UNUSE:725|>", "<|IMAGE_UNUSE:726|>", "<|IMAGE_UNUSE:727|>", "<|IMAGE_UNUSE:728|>", "<|IMAGE_UNUSE:729|>", "<|IMAGE_UNUSE:730|>", "<|IMAGE_UNUSE:731|>", "<|IMAGE_UNUSE:732|>", "<|IMAGE_UNUSE:733|>", "<|IMAGE_UNUSE:734|>", "<|IMAGE_UNUSE:735|>", "<|IMAGE_UNUSE:736|>", "<|IMAGE_UNUSE:737|>", "<|IMAGE_UNUSE:738|>", "<|IMAGE_UNUSE:739|>", "<|IMAGE_UNUSE:740|>", "<|IMAGE_UNUSE:741|>", "<|IMAGE_UNUSE:742|>", "<|IMAGE_UNUSE:743|>", "<|IMAGE_UNUSE:744|>", "<|IMAGE_UNUSE:745|>", "<|IMAGE_UNUSE:746|>", "<|IMAGE_UNUSE:747|>", "<|IMAGE_UNUSE:748|>", "<|IMAGE_UNUSE:749|>", "<|IMAGE_UNUSE:750|>", "<|IMAGE_UNUSE:751|>", "<|IMAGE_UNUSE:752|>", "<|IMAGE_UNUSE:753|>", "<|IMAGE_UNUSE:754|>", "<|IMAGE_UNUSE:755|>", "<|IMAGE_UNUSE:756|>", "<|IMAGE_UNUSE:757|>", "<|IMAGE_UNUSE:758|>", "<|IMAGE_UNUSE:759|>", "<|IMAGE_UNUSE:760|>", "<|IMAGE_UNUSE:761|>", "<|IMAGE_UNUSE:762|>", "<|IMAGE_UNUSE:763|>", "<|IMAGE_UNUSE:764|>", "<|IMAGE_UNUSE:765|>", "<|IMAGE_UNUSE:766|>", "<|IMAGE_UNUSE:767|>", "<|IMAGE_UNUSE:768|>", "<|IMAGE_UNUSE:769|>", "<|IMAGE_UNUSE:770|>", "<|IMAGE_UNUSE:771|>", "<|IMAGE_UNUSE:772|>", "<|IMAGE_UNUSE:773|>", "<|IMAGE_UNUSE:774|>", "<|IMAGE_UNUSE:775|>", "<|IMAGE_UNUSE:776|>", "<|IMAGE_UNUSE:777|>", "<|IMAGE_UNUSE:778|>", "<|IMAGE_UNUSE:779|>", "<|IMAGE_UNUSE:780|>", "<|IMAGE_UNUSE:781|>", "<|IMAGE_UNUSE:782|>", "<|IMAGE_UNUSE:783|>", "<|IMAGE_UNUSE:784|>", "<|IMAGE_UNUSE:785|>", "<|IMAGE_UNUSE:786|>", "<|IMAGE_UNUSE:787|>", "<|IMAGE_UNUSE:788|>", "<|IMAGE_UNUSE:789|>", "<|IMAGE_UNUSE:790|>", "<|IMAGE_UNUSE:791|>", "<|IMAGE_UNUSE:792|>", "<|IMAGE_UNUSE:793|>", "<|IMAGE_UNUSE:794|>", "<|IMAGE_UNUSE:795|>", "<|IMAGE_UNUSE:796|>", "<|IMAGE_UNUSE:797|>", "<|IMAGE_UNUSE:798|>", "<|IMAGE_UNUSE:799|>", "<|IMAGE_UNUSE:800|>", "<|IMAGE_UNUSE:801|>", "<|IMAGE_UNUSE:802|>", "<|IMAGE_UNUSE:803|>", "<|IMAGE_UNUSE:804|>", "<|IMAGE_UNUSE:805|>", "<|IMAGE_UNUSE:806|>", "<|IMAGE_UNUSE:807|>", "<|IMAGE_UNUSE:808|>", "<|IMAGE_UNUSE:809|>", "<|IMAGE_UNUSE:810|>", "<|IMAGE_UNUSE:811|>", "<|IMAGE_UNUSE:812|>", "<|IMAGE_UNUSE:813|>", "<|IMAGE_UNUSE:814|>", "<|IMAGE_UNUSE:815|>", "<|IMAGE_UNUSE:816|>", "<|IMAGE_UNUSE:817|>", "<|IMAGE_UNUSE:818|>", "<|IMAGE_UNUSE:819|>", "<|IMAGE_UNUSE:820|>", "<|IMAGE_UNUSE:821|>", "<|IMAGE_UNUSE:822|>", "<|IMAGE_UNUSE:823|>", "<|IMAGE_UNUSE:824|>", "<|IMAGE_UNUSE:825|>", "<|IMAGE_UNUSE:826|>", "<|IMAGE_UNUSE:827|>", "<|IMAGE_UNUSE:828|>", "<|IMAGE_UNUSE:829|>", "<|IMAGE_UNUSE:830|>", "<|IMAGE_UNUSE:831|>", "<|IMAGE_UNUSE:832|>", "<|IMAGE_UNUSE:833|>", "<|IMAGE_UNUSE:834|>", "<|IMAGE_UNUSE:835|>", "<|IMAGE_UNUSE:836|>", "<|IMAGE_UNUSE:837|>", "<|IMAGE_UNUSE:838|>", "<|IMAGE_UNUSE:839|>", "<|IMAGE_UNUSE:840|>", "<|IMAGE_UNUSE:841|>", "<|IMAGE_UNUSE:842|>", "<|IMAGE_UNUSE:843|>", "<|IMAGE_UNUSE:844|>", "<|IMAGE_UNUSE:845|>", "<|IMAGE_UNUSE:846|>", "<|IMAGE_UNUSE:847|>", "<|IMAGE_UNUSE:848|>", "<|IMAGE_UNUSE:849|>", "<|IMAGE_UNUSE:850|>", "<|IMAGE_UNUSE:851|>", "<|IMAGE_UNUSE:852|>", "<|IMAGE_UNUSE:853|>", "<|IMAGE_UNUSE:854|>", "<|IMAGE_UNUSE:855|>", "<|IMAGE_UNUSE:856|>", "<|IMAGE_UNUSE:857|>", "<|IMAGE_UNUSE:858|>", "<|IMAGE_UNUSE:859|>", "<|IMAGE_UNUSE:860|>", "<|IMAGE_UNUSE:861|>", "<|IMAGE_UNUSE:862|>", "<|IMAGE_UNUSE:863|>", "<|IMAGE_UNUSE:864|>", "<|IMAGE_UNUSE:865|>", "<|IMAGE_UNUSE:866|>", "<|IMAGE_UNUSE:867|>", "<|IMAGE_UNUSE:868|>", "<|IMAGE_UNUSE:869|>", "<|IMAGE_UNUSE:870|>", "<|IMAGE_UNUSE:871|>", "<|IMAGE_UNUSE:872|>", "<|IMAGE_UNUSE:873|>", "<|IMAGE_UNUSE:874|>", "<|IMAGE_UNUSE:875|>", "<|IMAGE_UNUSE:876|>", "<|IMAGE_UNUSE:877|>", "<|IMAGE_UNUSE:878|>", "<|IMAGE_UNUSE:879|>", "<|IMAGE_UNUSE:880|>", "<|IMAGE_UNUSE:881|>", "<|IMAGE_UNUSE:882|>", "<|IMAGE_UNUSE:883|>", "<|IMAGE_UNUSE:884|>", "<|IMAGE_UNUSE:885|>", "<|IMAGE_UNUSE:886|>", "<|IMAGE_UNUSE:887|>", "<|IMAGE_UNUSE:888|>", "<|IMAGE_UNUSE:889|>", "<|IMAGE_UNUSE:890|>", "<|IMAGE_UNUSE:891|>", "<|IMAGE_UNUSE:892|>", "<|IMAGE_UNUSE:893|>", "<|IMAGE_UNUSE:894|>", "<|IMAGE_UNUSE:895|>", "<|IMAGE_UNUSE:896|>", "<|IMAGE_UNUSE:897|>", "<|IMAGE_UNUSE:898|>", "<|IMAGE_UNUSE:899|>", "<|IMAGE_UNUSE:900|>", "<|IMAGE_UNUSE:901|>", "<|IMAGE_UNUSE:902|>", "<|IMAGE_UNUSE:903|>", "<|IMAGE_UNUSE:904|>", "<|IMAGE_UNUSE:905|>", "<|IMAGE_UNUSE:906|>", "<|IMAGE_UNUSE:907|>", "<|IMAGE_UNUSE:908|>", "<|IMAGE_UNUSE:909|>", "<|IMAGE_UNUSE:910|>", "<|IMAGE_UNUSE:911|>", "<|IMAGE_UNUSE:912|>", "<|IMAGE_UNUSE:913|>", "<|IMAGE_UNUSE:914|>", "<|IMAGE_UNUSE:915|>", "<|IMAGE_UNUSE:916|>", "<|IMAGE_UNUSE:917|>", "<|IMAGE_UNUSE:918|>", "<|IMAGE_UNUSE:919|>", "<|IMAGE_UNUSE:920|>", "<|IMAGE_UNUSE:921|>", "<|IMAGE_UNUSE:922|>", "<|IMAGE_UNUSE:923|>", "<|IMAGE_UNUSE:924|>", "<|IMAGE_UNUSE:925|>", "<|IMAGE_UNUSE:926|>", "<|IMAGE_UNUSE:927|>", "<|IMAGE_UNUSE:928|>", "<|IMAGE_UNUSE:929|>", "<|IMAGE_UNUSE:930|>", "<|IMAGE_UNUSE:931|>", "<|IMAGE_UNUSE:932|>", "<|IMAGE_UNUSE:933|>", "<|IMAGE_UNUSE:934|>", "<|IMAGE_UNUSE:935|>", "<|IMAGE_UNUSE:936|>", "<|IMAGE_UNUSE:937|>", "<|IMAGE_UNUSE:938|>", "<|IMAGE_UNUSE:939|>", "<|IMAGE_UNUSE:940|>", "<|IMAGE_UNUSE:941|>", "<|IMAGE_UNUSE:942|>", "<|IMAGE_UNUSE:943|>", "<|IMAGE_UNUSE:944|>", "<|IMAGE_UNUSE:945|>", "<|IMAGE_UNUSE:946|>", "<|IMAGE_UNUSE:947|>", "<|IMAGE_UNUSE:948|>", "<|IMAGE_UNUSE:949|>", "<|IMAGE_UNUSE:950|>", "<|IMAGE_UNUSE:951|>", "<|IMAGE_UNUSE:952|>", "<|IMAGE_UNUSE:953|>", "<|IMAGE_UNUSE:954|>", "<|IMAGE_UNUSE:955|>", "<|IMAGE_UNUSE:956|>", "<|IMAGE_UNUSE:957|>", "<|IMAGE_UNUSE:958|>", "<|IMAGE_UNUSE:959|>", "<|IMAGE_UNUSE:960|>", "<|IMAGE_UNUSE:961|>", "<|IMAGE_UNUSE:962|>", "<|IMAGE_UNUSE:963|>", "<|IMAGE_UNUSE:964|>", "<|IMAGE_UNUSE:965|>", "<|IMAGE_UNUSE:966|>", "<|IMAGE_UNUSE:967|>", "<|IMAGE_UNUSE:968|>", "<|IMAGE_UNUSE:969|>", "<|IMAGE_UNUSE:970|>", "<|IMAGE_UNUSE:971|>", "<|IMAGE_UNUSE:972|>", "<|IMAGE_UNUSE:973|>", "<|IMAGE_UNUSE:974|>", "<|IMAGE_UNUSE:975|>", "<|IMAGE_UNUSE:976|>", "<|IMAGE_UNUSE:977|>", "<|IMAGE_UNUSE:978|>", "<|IMAGE_UNUSE:979|>", "<|IMAGE_UNUSE:980|>", "<|IMAGE_UNUSE:981|>", "<|IMAGE_UNUSE:982|>", "<|IMAGE_UNUSE:983|>", "<|IMAGE_UNUSE:984|>", "<|IMAGE_UNUSE:985|>", "<|IMAGE_UNUSE:986|>", "<|IMAGE_UNUSE:987|>", "<|IMAGE_UNUSE:988|>", "<|IMAGE_UNUSE:989|>", "<|IMAGE_UNUSE:990|>", "<|IMAGE_UNUSE:991|>", "<|IMAGE_UNUSE:992|>", "<|IMAGE_UNUSE:993|>", "<|IMAGE_UNUSE:994|>", "<|IMAGE_UNUSE:995|>", "<|IMAGE_UNUSE:996|>", "<|IMAGE_UNUSE:997|>", "<|IMAGE_UNUSE:998|>", "<|IMAGE_UNUSE:999|>", "<|IMAGE_UNUSE:1000|>", "<|IMAGE_UNUSE:1001|>", "<|IMAGE_UNUSE:1002|>", "<|IMAGE_UNUSE:1003|>", "<|IMAGE_UNUSE:1004|>", "<|IMAGE_UNUSE:1005|>", "<|IMAGE_UNUSE:1006|>", "<|IMAGE_UNUSE:1007|>", "<|IMAGE_UNUSE:1008|>", "<|IMAGE_UNUSE:1009|>", "<|IMAGE_UNUSE:1010|>", "<|IMAGE_UNUSE:1011|>", "<|IMAGE_UNUSE:1012|>", "<|IMAGE_UNUSE:1013|>", "<|IMAGE_UNUSE:1014|>", "<|IMAGE_UNUSE:1015|>", "<|IMAGE_UNUSE:1016|>", "<|IMAGE_UNUSE:1017|>", "<|IMAGE_UNUSE:1018|>", "<|IMAGE_UNUSE:1019|>", "<|IMAGE_UNUSE:1020|>", "<|IMAGE_UNUSE:1021|>", "<|IMAGE_UNUSE:1022|>", "<|IMAGE_UNUSE:1023|>", "<|IMAGE_UNUSE:1024|>", "<|IMAGE_UNUSE:1025|>", "<|IMAGE_UNUSE:1026|>", "<|IMAGE_UNUSE:1027|>", "<|IMAGE_UNUSE:1028|>", "<|IMAGE_UNUSE:1029|>", "<|IMAGE_UNUSE:1030|>", "<|IMAGE_UNUSE:1031|>", "<|IMAGE_UNUSE:1032|>", "<|IMAGE_UNUSE:1033|>", "<|IMAGE_UNUSE:1034|>", "<|IMAGE_UNUSE:1035|>", "<|IMAGE_UNUSE:1036|>", "<|IMAGE_UNUSE:1037|>", "<|IMAGE_UNUSE:1038|>", "<|STREAMING_BEGIN|>", "<|STREAMING_END|>", "<|STREAMING_TEXT_END|>", "<|AUDIO_UNUSE:0|>", "<|AUDIO_UNUSE:1|>", "<|AUDIO_UNUSE:2|>", "<|AUDIO_UNUSE:3|>", "<|AUDIO_UNUSE:4|>", "<|AUDIO_UNUSE:5|>", "<|AUDIO_UNUSE:6|>", "<|AUDIO_UNUSE:7|>", "<|AUDIO_UNUSE:8|>", "<|AUDIO_UNUSE:9|>", "<|AUDIO_UNUSE:10|>", "<|AUDIO_UNUSE:11|>", "<|AUDIO_UNUSE:12|>", "<|AUDIO_UNUSE:13|>", "<|AUDIO_UNUSE:14|>", "<|AUDIO_UNUSE:15|>", "<|AUDIO_UNUSE:16|>", "<|AUDIO_UNUSE:17|>", "<|AUDIO_UNUSE:18|>", "<|AUDIO_UNUSE:19|>", "<|AUDIO_UNUSE:20|>", "<|AUDIO_UNUSE:21|>", "<|AUDIO_UNUSE:22|>", "<|AUDIO_UNUSE:23|>", "<|AUDIO_UNUSE:24|>", "<|AUDIO_UNUSE:25|>", "<|AUDIO_UNUSE:26|>", "<|AUDIO_UNUSE:27|>", "<|AUDIO_UNUSE:28|>", "<|AUDIO_UNUSE:29|>", "<|AUDIO_UNUSE:30|>", "<|AUDIO_UNUSE:31|>", "<|AUDIO_UNUSE:32|>", "<|AUDIO_UNUSE:33|>", "<|AUDIO_UNUSE:34|>", "<|AUDIO_UNUSE:35|>", "<|AUDIO_UNUSE:36|>", "<|AUDIO_UNUSE:37|>", "<|AUDIO_UNUSE:38|>", "<|AUDIO_UNUSE:39|>", "<|AUDIO_UNUSE:40|>", "<|AUDIO_UNUSE:41|>", "<|AUDIO_UNUSE:42|>", "<|AUDIO_UNUSE:43|>", "<|AUDIO_UNUSE:44|>", "<|AUDIO_UNUSE:45|>", "<|AUDIO_UNUSE:46|>", "<|AUDIO_UNUSE:47|>", "<|AUDIO_UNUSE:48|>", "<|AUDIO_UNUSE:49|>", "<|AUDIO_UNUSE:50|>", "<|AUDIO_UNUSE:51|>", "<|AUDIO_UNUSE:52|>", "<|AUDIO_UNUSE:53|>", "<|AUDIO_UNUSE:54|>", "<|AUDIO_UNUSE:55|>", "<|AUDIO_UNUSE:56|>", "<|AUDIO_UNUSE:57|>", "<|AUDIO_UNUSE:58|>", "<|AUDIO_UNUSE:59|>", "<|AUDIO_UNUSE:60|>", "<|AUDIO_UNUSE:61|>", "<|AUDIO_UNUSE:62|>", "<|AUDIO_UNUSE:63|>", "<|AUDIO_UNUSE:64|>", "<|AUDIO_UNUSE:65|>", "<|AUDIO_UNUSE:66|>", "<|AUDIO_UNUSE:67|>", "<|AUDIO_UNUSE:68|>", "<|AUDIO_UNUSE:69|>", "<|AUDIO_UNUSE:70|>", "<|AUDIO_UNUSE:71|>", "<|AUDIO_UNUSE:72|>", "<|AUDIO_UNUSE:73|>", "<|AUDIO_UNUSE:74|>", "<|AUDIO_UNUSE:75|>", "<|AUDIO_UNUSE:76|>", "<|AUDIO_UNUSE:77|>", "<|AUDIO_UNUSE:78|>", "<|AUDIO_UNUSE:79|>", "<|AUDIO_UNUSE:80|>", "<|AUDIO_UNUSE:81|>", "<|AUDIO_UNUSE:82|>", "<|AUDIO_UNUSE:83|>", "<|AUDIO_UNUSE:84|>", "<|AUDIO_UNUSE:85|>", "<|AUDIO_UNUSE:86|>", "<|AUDIO_UNUSE:87|>", "<|AUDIO_UNUSE:88|>", "<|AUDIO_UNUSE:89|>", "<|AUDIO_UNUSE:90|>", "<|AUDIO_UNUSE:91|>", "<|AUDIO_UNUSE:92|>", "<|AUDIO_UNUSE:93|>", "<|AUDIO_UNUSE:94|>", "<|AUDIO_UNUSE:95|>", "<|AUDIO_UNUSE:96|>", "<|AUDIO_UNUSE:97|>", "<|AUDIO_UNUSE:98|>", "<|AUDIO_UNUSE:99|>", "<|AUDIO_UNUSE:100|>", "<|AUDIO_UNUSE:101|>", "<|AUDIO_UNUSE:102|>", "<|AUDIO_UNUSE:103|>", "<|AUDIO_UNUSE:104|>", "<|AUDIO_UNUSE:105|>", "<|AUDIO_UNUSE:106|>", "<|AUDIO_UNUSE:107|>", "<|AUDIO_UNUSE:108|>", "<|AUDIO_UNUSE:109|>", "<|AUDIO_UNUSE:110|>", "<|AUDIO_UNUSE:111|>", "<|AUDIO_UNUSE:112|>", "<|AUDIO_UNUSE:113|>", "<|AUDIO_UNUSE:114|>", "<|AUDIO_UNUSE:115|>", "<|AUDIO_UNUSE:116|>", "<|AUDIO_UNUSE:117|>", "<|AUDIO_UNUSE:118|>", "<|AUDIO_UNUSE:119|>", "<|AUDIO_UNUSE:120|>", "<|AUDIO_UNUSE:121|>", "<|AUDIO_UNUSE:122|>", "<|AUDIO_UNUSE:123|>", "<|AUDIO_UNUSE:124|>", "<|AUDIO_UNUSE:125|>", "<|AUDIO_UNUSE:126|>", "<|AUDIO_UNUSE:127|>", "<|AUDIO_UNUSE:128|>", "<|AUDIO_UNUSE:129|>", "<|AUDIO_UNUSE:130|>", "<|AUDIO_UNUSE:131|>", "<|AUDIO_UNUSE:132|>", "<|AUDIO_UNUSE:133|>", "<|AUDIO_UNUSE:134|>", "<|AUDIO_UNUSE:135|>", "<|AUDIO_UNUSE:136|>", "<|AUDIO_UNUSE:137|>", "<|AUDIO_UNUSE:138|>", "<|AUDIO_UNUSE:139|>", "<|AUDIO_UNUSE:140|>", "<|AUDIO_UNUSE:141|>", "<|AUDIO_UNUSE:142|>", "<|AUDIO_UNUSE:143|>", "<|AUDIO_UNUSE:144|>", "<|AUDIO_UNUSE:145|>", "<|AUDIO_UNUSE:146|>", "<|AUDIO_UNUSE:147|>", "<|AUDIO_UNUSE:148|>", "<|AUDIO_UNUSE:149|>", "<|AUDIO_UNUSE:150|>", "<|AUDIO_UNUSE:151|>", "<|AUDIO_UNUSE:152|>", "<|AUDIO_UNUSE:153|>", "<|AUDIO_UNUSE:154|>", "<|AUDIO_UNUSE:155|>", "<|AUDIO_UNUSE:156|>", "<|AUDIO_UNUSE:157|>", "<|AUDIO_UNUSE:158|>", "<|AUDIO_UNUSE:159|>", "<|AUDIO_UNUSE:160|>", "<|AUDIO_UNUSE:161|>", "<|AUDIO_UNUSE:162|>", "<|AUDIO_UNUSE:163|>", "<|AUDIO_UNUSE:164|>", "<|AUDIO_UNUSE:165|>", "<|AUDIO_UNUSE:166|>", "<|AUDIO_UNUSE:167|>", "<|AUDIO_UNUSE:168|>", "<|AUDIO_UNUSE:169|>", "<|AUDIO_UNUSE:170|>", "<|AUDIO_UNUSE:171|>", "<|AUDIO_UNUSE:172|>", "<|AUDIO_UNUSE:173|>", "<|AUDIO_UNUSE:174|>", "<|AUDIO_UNUSE:175|>", "<|AUDIO_UNUSE:176|>", "<|AUDIO_UNUSE:177|>", "<|AUDIO_UNUSE:178|>", "<|AUDIO_UNUSE:179|>", "<|AUDIO_UNUSE:180|>", "<|AUDIO_UNUSE:181|>", "<|AUDIO_UNUSE:182|>", "<|AUDIO_UNUSE:183|>", "<|AUDIO_UNUSE:184|>", "<|AUDIO_UNUSE:185|>", "<|AUDIO_UNUSE:186|>", "<|AUDIO_UNUSE:187|>", "<|AUDIO_UNUSE:188|>", "<|AUDIO_UNUSE:189|>", "<|AUDIO_UNUSE:190|>", "<|AUDIO_UNUSE:191|>", "<|AUDIO_UNUSE:192|>", "<|AUDIO_UNUSE:193|>", "<|AUDIO_UNUSE:194|>", "<|AUDIO_UNUSE:195|>", "<|AUDIO_UNUSE:196|>", "<|AUDIO_UNUSE:197|>", "<|AUDIO_UNUSE:198|>", "<|AUDIO_UNUSE:199|>", "<|AUDIO_UNUSE:200|>", "<|AUDIO_UNUSE:201|>", "<|AUDIO_UNUSE:202|>", "<|AUDIO_UNUSE:203|>", "<|AUDIO_UNUSE:204|>", "<|AUDIO_UNUSE:205|>", "<|AUDIO_UNUSE:206|>", "<|AUDIO_UNUSE:207|>", "<|AUDIO_UNUSE:208|>", "<|AUDIO_UNUSE:209|>", "<|AUDIO_UNUSE:210|>", "<|AUDIO_UNUSE:211|>", "<|AUDIO_UNUSE:212|>", "<|AUDIO_UNUSE:213|>", "<|AUDIO_UNUSE:214|>", "<|AUDIO_UNUSE:215|>", "<|AUDIO_UNUSE:216|>", "<|AUDIO_UNUSE:217|>", "<|AUDIO_UNUSE:218|>", "<|AUDIO_UNUSE:219|>", "<|AUDIO_UNUSE:220|>", "<|AUDIO_UNUSE:221|>", "<|AUDIO_UNUSE:222|>", "<|AUDIO_UNUSE:223|>", "<|AUDIO_UNUSE:224|>", "<|AUDIO_UNUSE:225|>", "<|AUDIO_UNUSE:226|>", "<|AUDIO_UNUSE:227|>", "<|AUDIO_UNUSE:228|>", "<|AUDIO_UNUSE:229|>", "<|AUDIO_UNUSE:230|>", "<|AUDIO_UNUSE:231|>", "<|AUDIO_UNUSE:232|>", "<|AUDIO_UNUSE:233|>", "<|AUDIO_UNUSE:234|>", "<|AUDIO_UNUSE:235|>", "<|AUDIO_UNUSE:236|>", "<|AUDIO_UNUSE:237|>", "<|AUDIO_UNUSE:238|>", "<|AUDIO_UNUSE:239|>", "<|AUDIO_UNUSE:240|>", "<|AUDIO_UNUSE:241|>", "<|AUDIO_UNUSE:242|>", "<|AUDIO_UNUSE:243|>", "<|AUDIO_UNUSE:244|>", "<|AUDIO_UNUSE:245|>", "<|AUDIO_UNUSE:246|>", "<|AUDIO_UNUSE:247|>", "<|AUDIO_UNUSE:248|>", "<|AUDIO_UNUSE:249|>", "<|AUDIO_UNUSE:250|>", "<|AUDIO_UNUSE:251|>", "<|AUDIO_UNUSE:252|>", "<|AUDIO_UNUSE:253|>", "<|AUDIO_UNUSE:254|>", "<|AUDIO_UNUSE:255|>", "<|AUDIO_UNUSE:256|>", "<|AUDIO_UNUSE:257|>", "<|AUDIO_UNUSE:258|>", "<|AUDIO_UNUSE:259|>", "<|AUDIO_UNUSE:260|>", "<|AUDIO_UNUSE:261|>", "<|AUDIO_UNUSE:262|>", "<|AUDIO_UNUSE:263|>", "<|AUDIO_UNUSE:264|>", "<|AUDIO_UNUSE:265|>", "<|AUDIO_UNUSE:266|>", "<|AUDIO_UNUSE:267|>", "<|AUDIO_UNUSE:268|>", "<|AUDIO_UNUSE:269|>", "<|AUDIO_UNUSE:270|>", "<|AUDIO_UNUSE:271|>", "<|AUDIO_UNUSE:272|>", "<|AUDIO_UNUSE:273|>", "<|AUDIO_UNUSE:274|>", "<|AUDIO_UNUSE:275|>", "<|AUDIO_UNUSE:276|>", "<|AUDIO_UNUSE:277|>", "<|AUDIO_UNUSE:278|>", "<|AUDIO_UNUSE:279|>", "<|AUDIO_UNUSE:280|>", "<|AUDIO_UNUSE:281|>", "<|AUDIO_UNUSE:282|>", "<|AUDIO_UNUSE:283|>", "<|AUDIO_UNUSE:284|>", "<|AUDIO_UNUSE:285|>", "<|AUDIO_UNUSE:286|>", "<|AUDIO_UNUSE:287|>", "<|AUDIO_UNUSE:288|>", "<|AUDIO_UNUSE:289|>", "<|AUDIO_UNUSE:290|>", "<|AUDIO_UNUSE:291|>", "<|AUDIO_UNUSE:292|>", "<|AUDIO_UNUSE:293|>", "<|AUDIO_UNUSE:294|>", "<|AUDIO_UNUSE:295|>", "<|AUDIO_UNUSE:296|>", "<|AUDIO_UNUSE:297|>", "<|AUDIO_UNUSE:298|>", "<|AUDIO_UNUSE:299|>", "<|AUDIO_UNUSE:300|>", "<|AUDIO_UNUSE:301|>", "<|AUDIO_UNUSE:302|>", "<|AUDIO_UNUSE:303|>", "<|AUDIO_UNUSE:304|>", "<|AUDIO_UNUSE:305|>", "<|AUDIO_UNUSE:306|>", "<|AUDIO_UNUSE:307|>", "<|AUDIO_UNUSE:308|>", "<|AUDIO_UNUSE:309|>", "<|AUDIO_UNUSE:310|>", "<|AUDIO_UNUSE:311|>", "<|AUDIO_UNUSE:312|>", "<|AUDIO_UNUSE:313|>", "<|AUDIO_UNUSE:314|>", "<|AUDIO_UNUSE:315|>", "<|AUDIO_UNUSE:316|>", "<|AUDIO_UNUSE:317|>", "<|AUDIO_UNUSE:318|>", "<|AUDIO_UNUSE:319|>", "<|AUDIO_UNUSE:320|>", "<|AUDIO_UNUSE:321|>", "<|AUDIO_UNUSE:322|>", "<|AUDIO_UNUSE:323|>", "<|AUDIO_UNUSE:324|>", "<|AUDIO_UNUSE:325|>", "<|AUDIO_UNUSE:326|>", "<|AUDIO_UNUSE:327|>", "<|AUDIO_UNUSE:328|>", "<|AUDIO_UNUSE:329|>", "<|AUDIO_UNUSE:330|>", "<|AUDIO_UNUSE:331|>", "<|AUDIO_UNUSE:332|>", "<|AUDIO_UNUSE:333|>", "<|AUDIO_UNUSE:334|>", "<|AUDIO_UNUSE:335|>", "<|AUDIO_UNUSE:336|>", "<|AUDIO_UNUSE:337|>", "<|AUDIO_UNUSE:338|>", "<|AUDIO_UNUSE:339|>", "<|AUDIO_UNUSE:340|>", "<|AUDIO_UNUSE:341|>", "<|AUDIO_UNUSE:342|>", "<|AUDIO_UNUSE:343|>", "<|AUDIO_UNUSE:344|>", "<|AUDIO_UNUSE:345|>", "<|AUDIO_UNUSE:346|>", "<|AUDIO_UNUSE:347|>", "<|AUDIO_UNUSE:348|>", "<|AUDIO_UNUSE:349|>", "<|AUDIO_UNUSE:350|>", "<|AUDIO_UNUSE:351|>", "<|AUDIO_UNUSE:352|>", "<|AUDIO_UNUSE:353|>", "<|AUDIO_UNUSE:354|>", "<|AUDIO_UNUSE:355|>", "<|AUDIO_UNUSE:356|>", "<|AUDIO_UNUSE:357|>", "<|AUDIO_UNUSE:358|>", "<|AUDIO_UNUSE:359|>", "<|AUDIO_UNUSE:360|>", "<|AUDIO_UNUSE:361|>", "<|AUDIO_UNUSE:362|>", "<|AUDIO_UNUSE:363|>", "<|AUDIO_UNUSE:364|>", "<|AUDIO_UNUSE:365|>", "<|AUDIO_UNUSE:366|>", "<|AUDIO_UNUSE:367|>", "<|AUDIO_UNUSE:368|>", "<|AUDIO_UNUSE:369|>", "<|AUDIO_UNUSE:370|>", "<|AUDIO_UNUSE:371|>", "<|AUDIO_UNUSE:372|>", "<|AUDIO_UNUSE:373|>", "<|AUDIO_UNUSE:374|>", "<|AUDIO_UNUSE:375|>", "<|AUDIO_UNUSE:376|>", "<|AUDIO_UNUSE:377|>", "<|AUDIO_UNUSE:378|>", "<|AUDIO_UNUSE:379|>", "<|AUDIO_UNUSE:380|>", "<|AUDIO_UNUSE:381|>", "<|AUDIO_UNUSE:382|>", "<|AUDIO_UNUSE:383|>", "<|AUDIO_UNUSE:384|>", "<|AUDIO_UNUSE:385|>", "<|AUDIO_UNUSE:386|>", "<|AUDIO_UNUSE:387|>", "<|AUDIO_UNUSE:388|>", "<|AUDIO_UNUSE:389|>", "<|AUDIO_UNUSE:390|>", "<|AUDIO_UNUSE:391|>", "<|AUDIO_UNUSE:392|>", "<|AUDIO_UNUSE:393|>", "<|AUDIO_UNUSE:394|>", "<|AUDIO_UNUSE:395|>", "<|AUDIO_UNUSE:396|>", "<|AUDIO_UNUSE:397|>", "<|AUDIO_UNUSE:398|>", "<|AUDIO_UNUSE:399|>", "<|AUDIO_UNUSE:400|>", "<|AUDIO_UNUSE:401|>", "<|AUDIO_UNUSE:402|>", "<|AUDIO_UNUSE:403|>", "<|AUDIO_UNUSE:404|>", "<|AUDIO_UNUSE:405|>", "<|AUDIO_UNUSE:406|>", "<|AUDIO_UNUSE:407|>", "<|AUDIO_UNUSE:408|>", "<|AUDIO_UNUSE:409|>", "<|AUDIO_UNUSE:410|>", "<|AUDIO_UNUSE:411|>", "<|AUDIO_UNUSE:412|>", "<|AUDIO_UNUSE:413|>", "<|AUDIO_UNUSE:414|>", "<|AUDIO_UNUSE:415|>", "<|AUDIO_UNUSE:416|>", "<|AUDIO_UNUSE:417|>", "<|AUDIO_UNUSE:418|>", "<|AUDIO_UNUSE:419|>", "<|AUDIO_UNUSE:420|>", "<|AUDIO_UNUSE:421|>", "<|AUDIO_UNUSE:422|>", "<|AUDIO_UNUSE:423|>", "<|AUDIO_UNUSE:424|>", "<|AUDIO_UNUSE:425|>", "<|AUDIO_UNUSE:426|>", "<|AUDIO_UNUSE:427|>", "<|AUDIO_UNUSE:428|>", "<|AUDIO_UNUSE:429|>", "<|AUDIO_UNUSE:430|>", "<|AUDIO_UNUSE:431|>", "<|AUDIO_UNUSE:432|>", "<|AUDIO_UNUSE:433|>", "<|AUDIO_UNUSE:434|>", "<|AUDIO_UNUSE:435|>", "<|AUDIO_UNUSE:436|>", "<|AUDIO_UNUSE:437|>", "<|AUDIO_UNUSE:438|>", "<|AUDIO_UNUSE:439|>", "<|AUDIO_UNUSE:440|>", "<|AUDIO_UNUSE:441|>", "<|AUDIO_UNUSE:442|>", "<|AUDIO_UNUSE:443|>", "<|AUDIO_UNUSE:444|>", "<|AUDIO_UNUSE:445|>", "<|AUDIO_UNUSE:446|>", "<|AUDIO_UNUSE:447|>", "<|AUDIO_UNUSE:448|>", "<|AUDIO_UNUSE:449|>", "<|AUDIO_UNUSE:450|>", "<|AUDIO_UNUSE:451|>", "<|AUDIO_UNUSE:452|>", "<|AUDIO_UNUSE:453|>", "<|AUDIO_UNUSE:454|>", "<|AUDIO_UNUSE:455|>", "<|AUDIO_UNUSE:456|>", "<|AUDIO_UNUSE:457|>", "<|AUDIO_UNUSE:458|>", "<|AUDIO_UNUSE:459|>", "<|AUDIO_UNUSE:460|>", "<|AUDIO_UNUSE:461|>", "<|AUDIO_UNUSE:462|>", "<|AUDIO_UNUSE:463|>", "<|AUDIO_UNUSE:464|>", "<|AUDIO_UNUSE:465|>", "<|AUDIO_UNUSE:466|>", "<|AUDIO_UNUSE:467|>", "<|AUDIO_UNUSE:468|>", "<|AUDIO_UNUSE:469|>", "<|AUDIO_UNUSE:470|>", "<|AUDIO_UNUSE:471|>", "<|AUDIO_UNUSE:472|>", "<|AUDIO_UNUSE:473|>", "<|AUDIO_UNUSE:474|>", "<|AUDIO_UNUSE:475|>", "<|AUDIO_UNUSE:476|>", "<|AUDIO_UNUSE:477|>", "<|AUDIO_UNUSE:478|>", "<|AUDIO_UNUSE:479|>", "<|AUDIO_UNUSE:480|>", "<|AUDIO_UNUSE:481|>", "<|AUDIO_UNUSE:482|>", "<|AUDIO_UNUSE:483|>", "<|AUDIO_UNUSE:484|>", "<|AUDIO_UNUSE:485|>", "<|AUDIO_UNUSE:486|>", "<|AUDIO_UNUSE:487|>", "<|AUDIO_UNUSE:488|>", "<|AUDIO_UNUSE:489|>", "<|AUDIO_UNUSE:490|>", "<|AUDIO_UNUSE:491|>", "<|AUDIO_UNUSE:492|>", "<|AUDIO_UNUSE:493|>", "<|AUDIO_UNUSE:494|>", "<|AUDIO_UNUSE:495|>", "<|AUDIO_UNUSE:496|>", "<|AUDIO_UNUSE:497|>", "<|AUDIO_UNUSE:498|>", "<|AUDIO_UNUSE:499|>", "<|AUDIO_UNUSE:500|>", "<|AUDIO_UNUSE:501|>", "<|AUDIO_UNUSE:502|>", "<|AUDIO_UNUSE:503|>", "<|AUDIO_UNUSE:504|>", "<|AUDIO_UNUSE:505|>", "<|AUDIO_UNUSE:506|>", "<|AUDIO_UNUSE:507|>", "<|AUDIO_UNUSE:508|>", "<|AUDIO_UNUSE:509|>", "<|AUDIO_UNUSE:510|>", "<|AUDIO_UNUSE:511|>", "<|AUDIO_UNUSE:512|>", "<|AUDIO_UNUSE:513|>", "<|AUDIO_UNUSE:514|>", "<|AUDIO_UNUSE:515|>", "<|AUDIO_UNUSE:516|>", "<|AUDIO_UNUSE:517|>", "<|AUDIO_UNUSE:518|>", "<|AUDIO_UNUSE:519|>", "<|AUDIO_UNUSE:520|>", "<|AUDIO_UNUSE:521|>", "<|AUDIO_UNUSE:522|>", "<|AUDIO_UNUSE:523|>", "<|AUDIO_UNUSE:524|>", "<|AUDIO_UNUSE:525|>", "<|AUDIO_UNUSE:526|>", "<|AUDIO_UNUSE:527|>", "<|AUDIO_UNUSE:528|>", "<|AUDIO_UNUSE:529|>", "<|AUDIO_UNUSE:530|>", "<|AUDIO_UNUSE:531|>", "<|AUDIO_UNUSE:532|>", "<|AUDIO_UNUSE:533|>", "<|AUDIO_UNUSE:534|>", "<|AUDIO_UNUSE:535|>", "<|AUDIO_UNUSE:536|>", "<|AUDIO_UNUSE:537|>", "<|AUDIO_UNUSE:538|>", "<|AUDIO_UNUSE:539|>", "<|AUDIO_UNUSE:540|>", "<|AUDIO_UNUSE:541|>", "<|AUDIO_UNUSE:542|>", "<|AUDIO_UNUSE:543|>", "<|AUDIO_UNUSE:544|>", "<|AUDIO_UNUSE:545|>", "<|AUDIO_UNUSE:546|>", "<|AUDIO_UNUSE:547|>", "<|AUDIO_UNUSE:548|>", "<|AUDIO_UNUSE:549|>", "<|AUDIO_UNUSE:550|>", "<|AUDIO_UNUSE:551|>", "<|AUDIO_UNUSE:552|>", "<|AUDIO_UNUSE:553|>", "<|AUDIO_UNUSE:554|>", "<|AUDIO_UNUSE:555|>", "<|AUDIO_UNUSE:556|>", "<|AUDIO_UNUSE:557|>", "<|AUDIO_UNUSE:558|>", "<|AUDIO_UNUSE:559|>", "<|AUDIO_UNUSE:560|>", "<|AUDIO_UNUSE:561|>", "<|AUDIO_UNUSE:562|>", "<|AUDIO_UNUSE:563|>", "<|AUDIO_UNUSE:564|>", "<|AUDIO_UNUSE:565|>", "<|AUDIO_UNUSE:566|>", "<|AUDIO_UNUSE:567|>", "<|AUDIO_UNUSE:568|>", "<|AUDIO_UNUSE:569|>", "<|AUDIO_UNUSE:570|>", "<|AUDIO_UNUSE:571|>", "<|AUDIO_UNUSE:572|>", "<|AUDIO_UNUSE:573|>", "<|AUDIO_UNUSE:574|>", "<|AUDIO_UNUSE:575|>", "<|AUDIO_UNUSE:576|>", "<|AUDIO_UNUSE:577|>", "<|AUDIO_UNUSE:578|>", "<|AUDIO_UNUSE:579|>", "<|AUDIO_UNUSE:580|>", "<|AUDIO_UNUSE:581|>", "<|AUDIO_UNUSE:582|>", "<|AUDIO_UNUSE:583|>", "<|AUDIO_UNUSE:584|>", "<|AUDIO_UNUSE:585|>", "<|AUDIO_UNUSE:586|>", "<|AUDIO_UNUSE:587|>", "<|AUDIO_UNUSE:588|>", "<|AUDIO_UNUSE:589|>", "<|AUDIO_UNUSE:590|>", "<|AUDIO_UNUSE:591|>", "<|AUDIO_UNUSE:592|>", "<|AUDIO_UNUSE:593|>", "<|AUDIO_UNUSE:594|>", "<|AUDIO_UNUSE:595|>", "<|AUDIO_UNUSE:596|>", "<|AUDIO_UNUSE:597|>", "<|AUDIO_UNUSE:598|>", "<|AUDIO_UNUSE:599|>", "<|AUDIO_UNUSE:600|>", "<|AUDIO_UNUSE:601|>", "<|AUDIO_UNUSE:602|>", "<|AUDIO_UNUSE:603|>", "<|AUDIO_UNUSE:604|>", "<|AUDIO_UNUSE:605|>", "<|AUDIO_UNUSE:606|>", "<|AUDIO_UNUSE:607|>", "<|AUDIO_UNUSE:608|>", "<|AUDIO_UNUSE:609|>", "<|AUDIO_UNUSE:610|>", "<|AUDIO_UNUSE:611|>", "<|AUDIO_UNUSE:612|>", "<|AUDIO_UNUSE:613|>", "<|AUDIO_UNUSE:614|>", "<|AUDIO_UNUSE:615|>", "<|AUDIO_UNUSE:616|>", "<|AUDIO_UNUSE:617|>", "<|AUDIO_UNUSE:618|>", "<|AUDIO_UNUSE:619|>", "<|AUDIO_UNUSE:620|>", "<|AUDIO_UNUSE:621|>", "<|AUDIO_UNUSE:622|>", "<|AUDIO_UNUSE:623|>", "<|AUDIO_UNUSE:624|>", "<|AUDIO_UNUSE:625|>", "<|AUDIO_UNUSE:626|>", "<|AUDIO_UNUSE:627|>", "<|AUDIO_UNUSE:628|>", "<|AUDIO_UNUSE:629|>", "<|AUDIO_UNUSE:630|>", "<|AUDIO_UNUSE:631|>", "<|AUDIO_UNUSE:632|>", "<|AUDIO_UNUSE:633|>", "<|AUDIO_UNUSE:634|>", "<|AUDIO_UNUSE:635|>", "<|AUDIO_UNUSE:636|>", "<|AUDIO_UNUSE:637|>", "<|AUDIO_UNUSE:638|>", "<|AUDIO_UNUSE:639|>", "<|AUDIO_UNUSE:640|>", "<|AUDIO_UNUSE:641|>", "<|AUDIO_UNUSE:642|>", "<|AUDIO_UNUSE:643|>", "<|AUDIO_UNUSE:644|>", "<|AUDIO_UNUSE:645|>", "<|AUDIO_UNUSE:646|>", "<|AUDIO_UNUSE:647|>", "<|AUDIO_UNUSE:648|>", "<|AUDIO_UNUSE:649|>", "<|AUDIO_UNUSE:650|>", "<|AUDIO_UNUSE:651|>", "<|AUDIO_UNUSE:652|>", "<|AUDIO_UNUSE:653|>", "<|AUDIO_UNUSE:654|>", "<|AUDIO_UNUSE:655|>", "<|AUDIO_UNUSE:656|>", "<|AUDIO_UNUSE:657|>", "<|AUDIO_UNUSE:658|>", "<|AUDIO_UNUSE:659|>", "<|AUDIO_UNUSE:660|>", "<|AUDIO_UNUSE:661|>", "<|AUDIO_UNUSE:662|>", "<|AUDIO_UNUSE:663|>", "<|AUDIO_UNUSE:664|>", "<|AUDIO_UNUSE:665|>", "<|AUDIO_UNUSE:666|>", "<|AUDIO_UNUSE:667|>", "<|AUDIO_UNUSE:668|>", "<|AUDIO_UNUSE:669|>", "<|AUDIO_UNUSE:670|>", "<|AUDIO_UNUSE:671|>", "<|AUDIO_UNUSE:672|>", "<|AUDIO_UNUSE:673|>", "<|AUDIO_UNUSE:674|>", "<|AUDIO_UNUSE:675|>", "<|AUDIO_UNUSE:676|>", "<|AUDIO_UNUSE:677|>", "<|AUDIO_UNUSE:678|>", "<|AUDIO_UNUSE:679|>", "<|AUDIO_UNUSE:680|>", "<|AUDIO_UNUSE:681|>", "<|AUDIO_UNUSE:682|>", "<|AUDIO_UNUSE:683|>", "<|AUDIO_UNUSE:684|>", "<|AUDIO_UNUSE:685|>", "<|AUDIO_UNUSE:686|>", "<|AUDIO_UNUSE:687|>", "<|AUDIO_UNUSE:688|>", "<|AUDIO_UNUSE:689|>", "<|AUDIO_UNUSE:690|>", "<|AUDIO_UNUSE:691|>", "<|AUDIO_UNUSE:692|>", "<|AUDIO_UNUSE:693|>", "<|AUDIO_UNUSE:694|>", "<|AUDIO_UNUSE:695|>", "<|AUDIO_UNUSE:696|>", "<|AUDIO_UNUSE:697|>", "<|AUDIO_UNUSE:698|>", "<|AUDIO_UNUSE:699|>", "<|AUDIO_UNUSE:700|>", "<|AUDIO_UNUSE:701|>", "<|AUDIO_UNUSE:702|>", "<|AUDIO_UNUSE:703|>", "<|AUDIO_UNUSE:704|>", "<|AUDIO_UNUSE:705|>", "<|AUDIO_UNUSE:706|>", "<|AUDIO_UNUSE:707|>", "<|AUDIO_UNUSE:708|>", "<|AUDIO_UNUSE:709|>", "<|AUDIO_UNUSE:710|>", "<|AUDIO_UNUSE:711|>", "<|AUDIO_UNUSE:712|>", "<|AUDIO_UNUSE:713|>", "<|AUDIO_UNUSE:714|>", "<|AUDIO_UNUSE:715|>", "<|AUDIO_UNUSE:716|>", "<|AUDIO_UNUSE:717|>", "<|AUDIO_UNUSE:718|>", "<|AUDIO_UNUSE:719|>", "<|AUDIO_UNUSE:720|>", "<|AUDIO_UNUSE:721|>", "<|AUDIO_UNUSE:722|>", "<|AUDIO_UNUSE:723|>", "<|AUDIO_UNUSE:724|>", "<|AUDIO_UNUSE:725|>", "<|AUDIO_UNUSE:726|>", "<|AUDIO_UNUSE:727|>", "<|AUDIO_UNUSE:728|>", "<|AUDIO_UNUSE:729|>", "<|AUDIO_UNUSE:730|>", "<|AUDIO_UNUSE:731|>", "<|AUDIO_UNUSE:732|>", "<|AUDIO_UNUSE:733|>", "<|AUDIO_UNUSE:734|>", "<|AUDIO_UNUSE:735|>", "<|AUDIO_UNUSE:736|>", "<|AUDIO_UNUSE:737|>", "<|AUDIO_UNUSE:738|>", "<|AUDIO_UNUSE:739|>", "<|AUDIO_UNUSE:740|>", "<|AUDIO_UNUSE:741|>", "<|AUDIO_UNUSE:742|>", "<|AUDIO_UNUSE:743|>", "<|AUDIO_UNUSE:744|>", "<|AUDIO_UNUSE:745|>", "<|AUDIO_UNUSE:746|>", "<|AUDIO_UNUSE:747|>", "<|AUDIO_UNUSE:748|>", "<|AUDIO_UNUSE:749|>", "<|AUDIO_UNUSE:750|>", "<|AUDIO_UNUSE:751|>", "<|AUDIO_UNUSE:752|>", "<|AUDIO_UNUSE:753|>", "<|AUDIO_UNUSE:754|>", "<|AUDIO_UNUSE:755|>", "<|AUDIO_UNUSE:756|>", "<|AUDIO_UNUSE:757|>", "<|AUDIO_UNUSE:758|>", "<|AUDIO_UNUSE:759|>", "<|AUDIO_UNUSE:760|>", "<|AUDIO_UNUSE:761|>", "<|AUDIO_UNUSE:762|>", "<|AUDIO_UNUSE:763|>", "<|AUDIO_UNUSE:764|>", "<|AUDIO_UNUSE:765|>", "<|AUDIO_UNUSE:766|>", "<|AUDIO_UNUSE:767|>", "<|AUDIO_UNUSE:768|>", "<|AUDIO_UNUSE:769|>", "<|AUDIO_UNUSE:770|>", "<|AUDIO_UNUSE:771|>", "<|AUDIO_UNUSE:772|>", "<|AUDIO_UNUSE:773|>", "<|AUDIO_UNUSE:774|>", "<|AUDIO_UNUSE:775|>", "<|AUDIO_UNUSE:776|>", "<|AUDIO_UNUSE:777|>", "<|AUDIO_UNUSE:778|>", "<|AUDIO_UNUSE:779|>", "<|AUDIO_UNUSE:780|>", "<|AUDIO_UNUSE:781|>", "<|AUDIO_UNUSE:782|>", "<|AUDIO_UNUSE:783|>", "<|AUDIO_UNUSE:784|>", "<|AUDIO_UNUSE:785|>", "<|AUDIO_UNUSE:786|>", "<|AUDIO_UNUSE:787|>", "<|AUDIO_UNUSE:788|>", "<|AUDIO_UNUSE:789|>", "<|AUDIO_UNUSE:790|>", "<|AUDIO_UNUSE:791|>", "<|AUDIO_UNUSE:792|>", "<|AUDIO_UNUSE:793|>", "<|AUDIO_UNUSE:794|>", "<|AUDIO_UNUSE:795|>", "<|AUDIO_UNUSE:796|>", "<|AUDIO_UNUSE:797|>", "<|AUDIO_UNUSE:798|>", "<|AUDIO_UNUSE:799|>", "<|AUDIO_UNUSE:800|>", "<|AUDIO_UNUSE:801|>", "<|AUDIO_UNUSE:802|>", "<|AUDIO_UNUSE:803|>", "<|AUDIO_UNUSE:804|>", "<|AUDIO_UNUSE:805|>", "<|AUDIO_UNUSE:806|>", "<|AUDIO_UNUSE:807|>", "<|AUDIO_UNUSE:808|>", "<|AUDIO_UNUSE:809|>", "<|AUDIO_UNUSE:810|>", "<|AUDIO_UNUSE:811|>", "<|AUDIO_UNUSE:812|>", "<|AUDIO_UNUSE:813|>", "<|AUDIO_UNUSE:814|>", "<|AUDIO_UNUSE:815|>", "<|AUDIO_UNUSE:816|>", "<|AUDIO_UNUSE:817|>", "<|AUDIO_UNUSE:818|>", "<|AUDIO_UNUSE:819|>", "<|AUDIO_UNUSE:820|>", "<|AUDIO_UNUSE:821|>", "<|AUDIO_UNUSE:822|>", "<|AUDIO_UNUSE:823|>", "<|AUDIO_UNUSE:824|>", "<|AUDIO_UNUSE:825|>", "<|AUDIO_UNUSE:826|>", "<|AUDIO_UNUSE:827|>", "<|AUDIO_UNUSE:828|>", "<|AUDIO_UNUSE:829|>", "<|AUDIO_UNUSE:830|>", "<|AUDIO_UNUSE:831|>", "<|AUDIO_UNUSE:832|>", "<|AUDIO_UNUSE:833|>", "<|AUDIO_UNUSE:834|>", "<|AUDIO_UNUSE:835|>", "<|AUDIO_UNUSE:836|>", "<|AUDIO_UNUSE:837|>", "<|AUDIO_UNUSE:838|>", "<|AUDIO_UNUSE:839|>", "<|AUDIO_UNUSE:840|>", "<|AUDIO_UNUSE:841|>", "<|AUDIO_UNUSE:842|>", "<|AUDIO_UNUSE:843|>", "<|AUDIO_UNUSE:844|>", "<|AUDIO_UNUSE:845|>", "<|AUDIO_UNUSE:846|>", "<|AUDIO_UNUSE:847|>", "<|AUDIO_UNUSE:848|>", "<|AUDIO_UNUSE:849|>", "<|AUDIO_UNUSE:850|>", "<|AUDIO_UNUSE:851|>", "<|AUDIO_UNUSE:852|>", "<|AUDIO_UNUSE:853|>", "<|AUDIO_UNUSE:854|>", "<|AUDIO_UNUSE:855|>", "<|AUDIO_UNUSE:856|>", "<|AUDIO_UNUSE:857|>", "<|AUDIO_UNUSE:858|>", "<|AUDIO_UNUSE:859|>", "<|AUDIO_UNUSE:860|>", "<|AUDIO_UNUSE:861|>", "<|AUDIO_UNUSE:862|>", "<|AUDIO_UNUSE:863|>", "<|AUDIO_UNUSE:864|>", "<|AUDIO_UNUSE:865|>", "<|AUDIO_UNUSE:866|>", "<|AUDIO_UNUSE:867|>", "<|AUDIO_UNUSE:868|>", "<|AUDIO_UNUSE:869|>", "<|AUDIO_UNUSE:870|>", "<|AUDIO_UNUSE:871|>", "<|AUDIO_UNUSE:872|>", "<|AUDIO_UNUSE:873|>", "<|AUDIO_UNUSE:874|>", "<|AUDIO_UNUSE:875|>", "<|AUDIO_UNUSE:876|>", "<|AUDIO_UNUSE:877|>", "<|AUDIO_UNUSE:878|>", "<|AUDIO_UNUSE:879|>", "<|AUDIO_UNUSE:880|>", "<|AUDIO_UNUSE:881|>", "<|AUDIO_UNUSE:882|>", "<|AUDIO_UNUSE:883|>", "<|AUDIO_UNUSE:884|>", "<|AUDIO_UNUSE:885|>", "<|AUDIO_UNUSE:886|>", "<|AUDIO_UNUSE:887|>", "<|AUDIO_UNUSE:888|>", "<|AUDIO_UNUSE:889|>", "<|AUDIO_UNUSE:890|>", "<|AUDIO_UNUSE:891|>", "<|AUDIO_UNUSE:892|>", "<|AUDIO_UNUSE:893|>", "<|AUDIO_UNUSE:894|>", "<|AUDIO_UNUSE:895|>", "<|AUDIO_UNUSE:896|>", "<|AUDIO_UNUSE:897|>", "<|AUDIO_UNUSE:898|>", "<|AUDIO_UNUSE:899|>", "<|AUDIO_UNUSE:900|>", "<|AUDIO_UNUSE:901|>", "<|AUDIO_UNUSE:902|>", "<|AUDIO_UNUSE:903|>", "<|AUDIO_UNUSE:904|>", "<|AUDIO_UNUSE:905|>", "<|AUDIO_UNUSE:906|>", "<|AUDIO_UNUSE:907|>", "<|AUDIO_UNUSE:908|>", "<|AUDIO_UNUSE:909|>", "<|AUDIO_UNUSE:910|>", "<|AUDIO_UNUSE:911|>", "<|AUDIO_UNUSE:912|>", "<|AUDIO_UNUSE:913|>", "<|AUDIO_UNUSE:914|>", "<|AUDIO_UNUSE:915|>", "<|AUDIO_UNUSE:916|>", "<|AUDIO_UNUSE:917|>", "<|AUDIO_UNUSE:918|>", "<|AUDIO_UNUSE:919|>", "<|AUDIO_UNUSE:920|>", "<|AUDIO_UNUSE:921|>", "<|AUDIO_UNUSE:922|>", "<|AUDIO_UNUSE:923|>", "<|AUDIO_UNUSE:924|>", "<|AUDIO_UNUSE:925|>", "<|AUDIO_UNUSE:926|>", "<|AUDIO_UNUSE:927|>", "<|AUDIO_UNUSE:928|>", "<|AUDIO_UNUSE:929|>", "<|AUDIO_UNUSE:930|>", "<|AUDIO_UNUSE:931|>", "<|AUDIO_UNUSE:932|>", "<|AUDIO_UNUSE:933|>", "<|AUDIO_UNUSE:934|>", "<|AUDIO_UNUSE:935|>", "<|AUDIO_UNUSE:936|>", "<|AUDIO_UNUSE:937|>", "<|AUDIO_UNUSE:938|>", "<|AUDIO_UNUSE:939|>", "<|AUDIO_UNUSE:940|>", "<|AUDIO_UNUSE:941|>", "<|AUDIO_UNUSE:942|>", "<|AUDIO_UNUSE:943|>", "<|AUDIO_UNUSE:944|>", "<|AUDIO_UNUSE:945|>", "<|AUDIO_UNUSE:946|>", "<|AUDIO_UNUSE:947|>", "<|AUDIO_UNUSE:948|>", "<|AUDIO_UNUSE:949|>", "<|AUDIO_UNUSE:950|>", "<|AUDIO_UNUSE:951|>", "<|AUDIO_UNUSE:952|>", "<|AUDIO_UNUSE:953|>", "<|AUDIO_UNUSE:954|>", "<|AUDIO_UNUSE:955|>", "<|AUDIO_UNUSE:956|>", "<|AUDIO_UNUSE:957|>", "<|AUDIO_UNUSE:958|>", "<|AUDIO_UNUSE:959|>", "<|AUDIO_UNUSE:960|>", "<|AUDIO_UNUSE:961|>", "<|AUDIO_UNUSE:962|>", "<|AUDIO_UNUSE:963|>", "<|AUDIO_UNUSE:964|>", "<|AUDIO_UNUSE:965|>", "<|AUDIO_UNUSE:966|>", "<|AUDIO_UNUSE:967|>", "<|AUDIO_UNUSE:968|>", "<|AUDIO_UNUSE:969|>", "<|AUDIO_UNUSE:970|>", "<|AUDIO_UNUSE:971|>", "<|AUDIO_UNUSE:972|>", "<|AUDIO_UNUSE:973|>", "<|AUDIO_UNUSE:974|>", "<|AUDIO_UNUSE:975|>", "<|AUDIO_UNUSE:976|>", "<|AUDIO_UNUSE:977|>", "<|AUDIO_UNUSE:978|>", "<|AUDIO_UNUSE:979|>", "<|AUDIO_UNUSE:980|>", "<|AUDIO_UNUSE:981|>", "<|AUDIO_UNUSE:982|>", "<|AUDIO_UNUSE:983|>", "<|AUDIO_UNUSE:984|>", "<|AUDIO_UNUSE:985|>", "<|AUDIO_UNUSE:986|>", "<|AUDIO_UNUSE:987|>", "<|AUDIO_UNUSE:988|>", "<|AUDIO_UNUSE:989|>", "<|AUDIO_UNUSE:990|>", "<|AUDIO_UNUSE:991|>", "<|AUDIO_UNUSE:992|>", "<|AUDIO_UNUSE:993|>", "<|AUDIO_UNUSE:994|>", "<|AUDIO_UNUSE:995|>", "<|AUDIO_UNUSE:996|>", "<|AUDIO_UNUSE:997|>", "<|AUDIO_UNUSE:998|>", "<|AUDIO_UNUSE:999|>", "<|AUDIO_UNUSE:1000|>", "<|AUDIO_UNUSE:1001|>", "<|AUDIO_UNUSE:1002|>", "<|AUDIO_UNUSE:1003|>", "<|AUDIO_UNUSE:1004|>", "<|AUDIO_UNUSE:1005|>", "<|AUDIO_UNUSE:1006|>", "<|AUDIO_UNUSE:1007|>", "<|AUDIO_UNUSE:1008|>", "<|AUDIO_UNUSE:1009|>", "<|AUDIO_UNUSE:1010|>", "<|AUDIO_UNUSE:1011|>", "<|AUDIO_UNUSE:1012|>", "<|AUDIO_UNUSE:1013|>", "<|AUDIO_UNUSE:1014|>", "<|AUDIO_UNUSE:1015|>", "<|AUDIO_UNUSE:1016|>", "<|AUDIO_UNUSE:1017|>", "<|AUDIO_UNUSE:1018|>", "<|AUDIO_UNUSE:1019|>", "<|AUDIO_UNUSE:1020|>", "", ""]} diff --git a/tokenization_ernie_45t_vl.py b/tokenization_ernie_45t_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..d9f34e3eb4364fb4cc269db93cd30db0310f241f --- /dev/null +++ b/tokenization_ernie_45t_vl.py @@ -0,0 +1,321 @@ +# Copyright (c) 2025 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tokenization classes for Ernie_45T_VL.""" + +import os +import re +from shutil import copyfile +from typing import Dict, List, Optional, Tuple, Union +import numpy as np +import torch +import sentencepiece as spm +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.tokenization_utils_base import ( + PaddingStrategy, + TextInput, +) +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + + +class Ernie4_5_VLTokenizer(PreTrainedTokenizer): + """ + Ernie4_5_VLTokenizer + """ + + vocab_files_names = { + "vocab_file": "tokenizer.model", + } + # Model input names expected by the tokenizer + model_input_names = ["input_ids", "position_ids", "attention_mask", "labels"] + # Padding side (where to add padding tokens) + padding_side = "right" + + def __init__( + self, + vocab_file, + bos_token="", + cls_token="", + eos_token="", + mask_token="", + pad_token="", + sep_token="", + unk_token="", + additional_special_tokens=None, + **kwargs, + ): + """ + Initialize the Ernie4_5_VLTokenizer + + Args: + vocab_file (str): Path to the tokenizer vocabulary model. + bos_token (str, optional): The beginning of sequence token. Defaults to `""`. + cls_token (str, optional): The classifier token. Defaults to `""`. + eos_token (str, optional): The end of sequence token. Defaults to `""`. + mask_token (str, optional): The masking token. Defaults to `""`. + pad_token (str, optional): The padding token. Defaults to `""`. + sep_token (str, optional): The separation token. Defaults to `""`. + unk_token (str, optional): The unknown tokens symbol. Defaults to `""`. + additional_special_tokens (List[str], optional): Additional special tokens to use. + Defaults to `["", ""]`. + **kwargs (dict): Additional keyword arguments passed along to the superclass. + """ + + # Store vocabulary file path + self.vocab_file = vocab_file + # Initialize SentencePiece processor + self.sp_model = spm.SentencePieceProcessor() + # Load the vocabulary model + self.sp_model.Load(vocab_file) + + # Set default additional special tokens if none provided + if additional_special_tokens is None: + additional_special_tokens = ["", ""] + super().__init__( + bos_token=bos_token, + cls_token=cls_token, + eos_token=eos_token, + mask_token=mask_token, + pad_token=pad_token, + sep_token=sep_token, + unk_token=unk_token, + additional_special_tokens=additional_special_tokens, + **kwargs, + ) + + @property + def space_token(self): + """Return the space token""" + return "" + + @property + def space_token_id(self): + """Return the ID of the space token""" + return self.sp_model.piece_to_id("") + + @property + def gend_token(self): + """Return the gender token""" + return "" + + @property + def gend_token_id(self): + """Return the ID of the gender token""" + return self.sp_model.piece_to_id("") + + @property + def im_start_id(self): + """Return the ID of the image start token""" + return self.sp_model.piece_to_id("<|im_start|>") + + @property + def im_end_id(self): + """Return the ID of the image end token""" + return self.sp_model.piece_to_id("<|im_end|>") + + @property + def vocab_size(self): + """Return the size of the vocabulary""" + return self.sp_model.vocab_size() + + def get_vocab(self): + """Return the vocabulary as a dictionary mapping tokens to IDs""" + vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} + vocab.update(self.added_tokens_encoder) + return vocab + + def _tokenize(self, text): + """Tokenize the input text into pieces""" + return self.sp_model.encode_as_pieces(text) + + def _convert_token_to_id(self, token): + """Convert a token to its corresponding ID""" + return self.sp_model.piece_to_id(token) + + def _convert_id_to_token(self, id): + """Convert an ID to its corresponding token""" + return self.sp_model.id_to_piece(id) + + def convert_tokens_to_string(self, tokens): + """Convert a sequence of tokens back to a string""" + current_sub_tokens = [] + out_string = "" + + for token in tokens: + # Handle special tokens differently + if token in self.all_special_tokens: + out_string += self.sp_model.decode(current_sub_tokens) + token + current_sub_tokens = [] + else: + current_sub_tokens.append(token) + + # Add any remaining sub-tokens + out_string += self.sp_model.decode(current_sub_tokens) + return out_string + + def prepare_for_model(self, *args, **kwargs): + """Prepare the tokenized inputs for the model""" + # Remove add_special_tokens if present (not supported) + if "add_special_tokens" in kwargs: + kwargs.pop("add_special_tokens") + return super().prepare_for_model(*args, **kwargs) + + def save_vocabulary( + self, save_directory, filename_prefix: Optional[str] = None + ) -> Tuple[str]: + """ + Save the vocabulary and special tokens file to a directory. + + Args: + save_directory (`str`): The directory to save the vocabulary to + filename_prefix (`str`, optional): Prefix to add to the filename + + Returns: + `Tuple(str)`: Paths to the saved files + """ + if not os.path.isdir(save_directory): + logger.error(f"Vocabulary path ({save_directory}) should be a directory") + return + + # Construct output vocabulary file path + out_vocab_file = os.path.join( + save_directory, + (filename_prefix + "-" if filename_prefix else "") + + self.vocab_files_names["vocab_file"], + ) + + # Copy or create vocabulary file + if os.path.abspath(self.vocab_file) != os.path.abspath( + out_vocab_file + ) and os.path.isfile(self.vocab_file): + copyfile(self.vocab_file, out_vocab_file) + elif not os.path.isfile(self.vocab_file): + with open(out_vocab_file, "wb") as fi: + content_spiece_model = self.sp_model.serialized_model_proto() + fi.write(content_spiece_model) + + return (out_vocab_file,) + + def _decode(self, *args, **kwargs): + """Decode token_id back to text""" + # Remove some parameters that aren't used + kwargs.pop("clean_up_tokenization_spaces", None) + kwargs.pop("spaces_between_special_tokens", None) + + # Call parent decode method with specific parameters + return super()._decode( + *args, + **kwargs, + clean_up_tokenization_spaces=False, + spaces_between_special_tokens=False, + ) + + def _pad( + self, + encoded_inputs: Dict, + max_length: Optional[int] = None, + padding_strategy=PaddingStrategy.DO_NOT_PAD, + pad_to_multiple_of: Optional[int] = None, + return_attention_mask: Optional[bool] = None, + ) -> dict: + """Pad the encoded inputs to the specified length""" + if return_attention_mask is None: + return_attention_mask = "attention_mask" in self.model_input_names + if return_attention_mask: + required_input = encoded_inputs[self.model_input_names[0]] + if padding_strategy == PaddingStrategy.LONGEST: + max_length = len(required_input) + + # Adjust max_length if needed for multiple of padding + if ( + max_length is not None + and pad_to_multiple_of is not None + and (max_length % pad_to_multiple_of != 0) + ): + max_length = ( + (max_length // pad_to_multiple_of) + 1 + ) * pad_to_multiple_of + + # Check if padding is needed + needs_to_be_padded = ( + padding_strategy != PaddingStrategy.DO_NOT_PAD + and len(required_input) != max_length + ) + + # Handle attention mask if present + if ( + "attention_mask" in encoded_inputs + and encoded_inputs["attention_mask"] is not None + ): + attention_mask = encoded_inputs.pop("attention_mask") + if isinstance(attention_mask, torch.Tensor): + attention_mask = attention_mask.numpy() + elif isinstance(attention_mask, list): + attention_mask = np.array(attention_mask) + elif not isinstance(attention_mask, np.ndarray): + raise ValueError( + f"Unexpected type {type(attention_mask)} of attention_mask, " + ) + else: + # Create default attention mask if none provided + attention_mask = np.tril( + np.ones((len(required_input), len(required_input)), dtype=np.int64) + ) + attention_mask = np.expand_dims(attention_mask, axis=0) + + # Perform padding if needed + if needs_to_be_padded: + difference = max_length - len(required_input) + if self.padding_side == "right": + if attention_mask.ndim == 1: + pad_width = [(0, difference)] + else: + pad_width = [(0, 0), (0, difference), (0, difference)] + elif self.padding_side == "left": + if attention_mask.ndim == 1: + pad_width = [(difference, 0)] + else: + pad_width = [(0, 0), (difference, 0), (difference, 0)] + else: + raise ValueError( + "Invalid padding strategy:" + str(self.padding_side) + ) + + attention_mask = np.pad( + attention_mask, + pad_width=pad_width, + mode="constant", + constant_values=0, + ) + + # Call parent padding method + encoded_inputs = super()._pad( + encoded_inputs, + max_length, + padding_strategy=padding_strategy, + pad_to_multiple_of=pad_to_multiple_of, + return_attention_mask=False, + ) + + # Add attention mask back if needed + if return_attention_mask: + encoded_inputs["attention_mask"] = attention_mask.tolist() + + return encoded_inputs + + +__all__ = ["Ernie4_5_VLTokenizer"] diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..d5776c7880fc9f3791b84102466a48416bf43230 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed2203974453df691287a0432c06737f1b17f20f5ab325fb33e31844d90ddb0 +size 1614362 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..490d3ac68ceb2e28b5d5d3fc92d3896cc205858d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,22 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "", + "cls_token": "<|begin_of_sentence|>", + "sep_token": "<|end_of_sentence|>", + "mask_token": "", + "sys_start_token": "", + "sys_end_token": "", + "header_start_token": "", + "header_end_token": "", + "additional_special_tokens": null, + "tokenizer_class": "Ernie4_5_VLTokenizer", + "auto_map": { + "AutoTokenizer": [ + "tokenization_ernie_45t_vl.Ernie4_5_VLTokenizer", + null + ] + }, + "chat_template": "\n{%- set image_count = namespace(value=0) -%}\n{%- set video_count = namespace(value=0) -%}\n{{- '<|begin_of_sentence|>' }}\n{%- for message in messages -%}\n {%- if message.role in ['system', 'user'] -%}\n {%- if message.role == 'user' -%}\n {{- 'User: ' -}}\n {%- endif -%}\n {%- if message.content is string -%}\n {{- message.content -}}\n {%- else -%}\n {%- for content_item in message.content -%}\n {%- if content_item.type == 'text' -%}\n {{- content_item.text -}}\n {%- elif content_item.type == 'image_url' -%}\n {%- set image_count.value = image_count.value + 1 -%}\n Picture {{ image_count.value }}:<|IMAGE_START|><|image@placeholder|><|IMAGE_END|>\n {%- elif content_item.type == 'video_url' -%}\n {%- set video_count.value = video_count.value + 1 -%}\n Video {{ video_count.value }}:<|VIDEO_START|><|video@placeholder|><|VIDEO_END|>\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- if message.role == 'system' -%}\n {{- '\n' -}}\n {%- endif -%}\n {%- elif message.role == 'assistant' -%}\n {%- macro extract_text_content(content_field) -%}\n {%- if content_field is string -%}\n {{- content_field -}}\n {%- elif content_field is iterable and content_field is not string -%}\n {%- set ns = namespace(text_parts=[]) -%}\n {%- set text_parts = [] -%}\n {%- for item in content_field -%}\n {%- if item.type == 'text' -%}\n {%- set ns.text_parts = ns.text_parts + [item.text] -%}\n {%- endif -%}\n {%- endfor -%}\n {{- ns.text_parts | join('') -}}\n {%- else -%}\n {{- '' -}}\n {%- endif -%}\n {%- endmacro -%}\n {%- set reasoning_content = extract_text_content(message.reasoning_content) -%}\n {%- set content = extract_text_content(message.content) -%}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}\n {%- set content = content.split('')[-1].lstrip('\n') %}\n {%- endif %}\n {%- if reasoning_content %}\n {{- '\n' + 'Assistant: ' + '\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}\n {%- else %}\n {{- '\n' + 'Assistant: ' + content }}\n {%- endif %}\n {{- '<|end_of_sentence|>' }}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt is not defined or add_generation_prompt is true %}\n {{- '\nAssistant: ' -}}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '\n\n\n\n' }}\n {%- endif %}\n {%- if enable_thinking is not defined or enable_thinking is true %}\n {{- '' }}\n {%- endif %}\n{%- endif %}\n" +} diff --git a/video_utils_ernie_45t_vl.py b/video_utils_ernie_45t_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..83c3e0a60089787cb6dbc122ce97bca1820e9a09 --- /dev/null +++ b/video_utils_ernie_45t_vl.py @@ -0,0 +1,514 @@ +# Copyright (c) 2025 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os +import random +import requests +import base64 +import datetime +import hashlib +import threading +import uuid +import decord + +import numpy as np +from PIL import Image, ImageDraw, ImageFont +from PIL.ExifTags import TAGS +from pathlib import Path +from tempfile import NamedTemporaryFile as ntf + +try: + # moviepy 1.0 + import moviepy.editor as mp +except: + # moviepy 2.0 + import moviepy as mp + +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + +RAW_VIDEO_DIR = "./download_tmp/raw_video/" +RAW_IMAGE_DIR = "./download_tmp/raw_images/" +EXTRACTED_FRAME_DIR = "./download_tmp/extracted_frames/" +TMP_DIR = "./download_tmp/upload_tmp/" + +FONT_PATH = os.path.join(Path(__file__).parent.absolute(), "Roboto-Regular.ttf") + + +def is_gif(data: bytes) -> bool: + """ + check if a bytes is a gif based on the magic head + """ + return data[:6] in (b"GIF87a", b"GIF89a") + + +class VideoReaderWrapper(decord.VideoReader): + """ + Solving memory leak bug + + https://github.com/dmlc/decord/issues/208 + """ + + def __init__(self, video_path, *args, **kwargs): + with ntf(delete=True, suffix=".gif") as gif_file: + gif_input = None + self.original_file = None + if isinstance(video_path, str): + self.original_file = video_path + if video_path.lower().endswith(".gif"): + gif_input = video_path + elif isinstance(video_path, bytes): + if is_gif(video_path): + gif_file.write(video_path) + gif_input = gif_file.name + elif isinstance(video_path, io.BytesIO): + video_path.seek(0) + tmp_bytes = video_path.read() + video_path.seek(0) + if is_gif(tmp_bytes): + gif_file.write(tmp_bytes) + gif_input = gif_file.name + + if gif_input is not None: + clip = mp.VideoFileClip(gif_input) + mp4_file = ntf(delete=False, suffix=".mp4") + clip.write_videofile(mp4_file.name, verbose=False, logger=None) + clip.close() + video_path = mp4_file.name + self.original_file = video_path + + super().__init__(video_path, *args, **kwargs) + self.seek(0) + + def __getitem__(self, key): + frames = super().__getitem__(key) + self.seek(0) + return frames + + def __del__(self): + if self.original_file and os.path.exists(self.original_file): + os.remove(self.original_file) + + +def get_filename(url=None): + """ + Get Filename + """ + if url is None: + return str(uuid.uuid4()).replace("-", "") + t = datetime.datetime.now() + if not isinstance(url, bytes): + url = url.encode("utf-8") + + md5_hash = hashlib.md5(url).hexdigest() + pid = os.getpid() + tid = threading.get_ident() + + # Remove the suffix to prevent save-jpg from reporting errors + image_filname = f"{t.year}-{t.month:02d}-{t.day:02d}-{pid}-{tid}-{md5_hash}" + return image_filname + + +def file_download(url, download_dir, save_to_disk=False, retry=0, retry_interval=3): + """ + Description: Download url, if url is PIL, return directly + Args: + url(str, PIL): http/local path/io.Bytes, note that io.Bytes is the image byte stream + download_path: when save_to_disk=True, return the saved address + save_to_disk: whether to save in the local path + """ + + if isinstance(url, Image.Image): + return url + elif isinstance(url, VideoReaderWrapper): + return url + elif url.startswith("http"): + response = requests.get(url) + bytes_data = response.content + elif os.path.isfile(url): + if save_to_disk: + return url + bytes_data = open(url, "rb").read() + else: + bytes_data = base64.b64decode(url) + if not save_to_disk: + return bytes_data + + download_path = os.path.join(download_dir, get_filename(url)) + Path(download_path).parent.mkdir(parents=True, exist_ok=True) + with open(download_path, "wb") as f: + f.write(bytes_data) + return download_path + + +def get_downloadable( + url, download_dir=RAW_VIDEO_DIR, save_to_disk=False, retry=0, retry_interval=3 +): + """download video and store it in the disk + + return downloaded **path** if save_to_disk is set to true + return downloaded **bytes** if save_to_disk is set to false + """ + + if not os.path.exists(download_dir): + os.makedirs(download_dir) + downloaded_path = file_download( + url, + download_dir, + save_to_disk=save_to_disk, + retry=retry, + retry_interval=retry_interval, + ) + return downloaded_path + + +def get_downloadable_image( + download_path, need_exif_info, retry_max_time=0, retry_interval=3 +): + """ + Get downloadable with exif info and image processing + """ + + def get_image_exif(image): + exif_data = image._getexif() + exif_info = {} + if exif_data is not None: + for tag, value in exif_data.items(): + tag_name = TAGS.get(tag, tag) + exif_info[tag_name] = value.strip() + return exif_info + + def has_transparent_background(img): + """has_transparent_background""" + if img.mode in ("RGBA", "LA") or ( + img.mode == "P" and "transparency" in img.info + ): + # Check for any pixel with alpha channel less than 255 (fully opaque) + alpha = img.convert("RGBA").split()[-1] + if alpha.getextrema()[0] < 255: + return True + return False + + def add_white_background(img): + """ + Add a white background to a transparent background image + """ + if img.mode != "RGBA": + img = img.convert("RGBA") + # Create an image with a white background and the same size as the original image + img_white_background = Image.new("RGBA", img.size, (255, 255, 255)) + + # Paste the original image onto a white background + img_white_background.paste(img, (0, 0), img) + + return img_white_background + + def change_I16_to_L(img): + """ + Convert image from I;16 mode to L mode + """ + # Since the point function in I mode only supports addition, subtraction, and multiplication, + # the following * (1 / 256) cannot be changed to division. + return img.point(lambda i: i * (1 / 256)).convert("L") + + image = get_downloadable( + download_path, + save_to_disk=False, + retry=retry_max_time, + retry_interval=retry_interval, + ) + if isinstance(image, Image.Image): + pil_image = image + else: + pil_image = Image.open(io.BytesIO(image)) + if need_exif_info: + try: + exif_info = get_image_exif(pil_image) + except Exception as why: + exif_info = {} + else: + exif_info = {} + + try: + if pil_image.mode == "I;16": + pil_image = change_I16_to_L(pil_image) + if has_transparent_background(pil_image): + pil_image = add_white_background(pil_image) + except Exception as e: + pass + + return pil_image.convert("RGB"), exif_info + + +def read_video_decord(video_path, save_to_disk): + """get reader and meta by decord""" + video_path = get_downloadable(video_path, save_to_disk=save_to_disk) + if isinstance(video_path, VideoReaderWrapper): + video_reader = video_path + else: + if isinstance(video_path, bytes): + video_path = io.BytesIO(video_path) + video_reader = VideoReaderWrapper(video_path, num_threads=1) + vlen = len(video_reader) + fps = video_reader.get_avg_fps() + duration = vlen / float(fps) + + video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} + + return video_reader, video_meta, video_path + + +def get_frame_indices( + vlen, + target_frames=-1, + target_fps=-1, + frames_sample="middle", + fix_start=None, + input_fps=-1, +): + """get_frame_indices""" + assert frames_sample in ["rand", "middle", "leading"] + if target_frames > 0: + assert target_fps <= 0, "target_fps must be negative if target_frames is given." + if target_frames > vlen: + acc_samples = vlen + logger.info( + f"target_frames={target_frames} is larger than video length {vlen}, " + f"will sample {acc_samples} frames." + ) + else: + acc_samples = target_frames + logger.debug( + f"sampling at target_frames={target_frames}, frames_sample={frames_sample}" + ) + + # split the video into `acc_samples` intervals, and sample from each interval. + intervals = np.linspace(start=0, stop=vlen, num=acc_samples + 1).astype(int) + ranges = [] + for idx, interv in enumerate(intervals[:-1]): + ranges.append((interv, intervals[idx + 1] - 1)) + if frames_sample == "rand": + try: + frame_indices = [random.choice(range(x[0], x[1])) for x in ranges] + except Exception as e: + frame_indices = np.random.permutation(vlen)[:acc_samples] + frame_indices.sort() + frame_indices = list(frame_indices) + elif fix_start is not None: + frame_indices = [x[0] + fix_start for x in ranges] + elif frames_sample == "leading": + frame_indices = [x[0] for x in ranges] + elif frames_sample == "middle": + frame_indices = [(x[0] + x[1]) // 2 for x in ranges] + else: + raise NotImplementedError + + elif target_fps > 0: + assert ( + target_frames <= 0 + ), "target_frames must be negative if target_fps is given." + assert input_fps > 0, "input_fps must be provided if target_fps is given." + logger.info(f"sampling at fps={target_fps}, frames_sample={frames_sample}") + duration = float(vlen) / input_fps + delta = ( + 1 / target_fps + ) # gap between frames, this is also the clip length each frame represents + if frames_sample == "middle": + frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) + elif frames_sample == "leading": + frame_seconds = np.arange(0, duration, delta) + if frames_sample == "rand": + frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) + rand_offset = np.random.rand(*(frame_seconds.shape)) - 0.5 + frame_seconds += rand_offset * delta + frame_indices = np.around(frame_seconds * input_fps).astype(int) + frame_indices = [e for e in frame_indices if e < vlen] + + else: + raise ValueError( + "Must provide either positive target_fps or positive target_frames." + ) + + return frame_indices + + +def read_frames_decord( + video_path, + video_reader, + video_meta, + target_frames=-1, + target_fps=-1, + frames_sample="middle", + fix_start=None, + save_to_disk=False, + cache_dir=EXTRACTED_FRAME_DIR, + frame_indices=None, + tol=10, +): + """get frames by decord""" + + if frame_indices is None: + frame_indices = get_frame_indices( + video_meta["num_of_frame"], + target_frames=target_frames, + target_fps=target_fps, + frames_sample=frames_sample, + fix_start=fix_start, + input_fps=video_meta["fps"], + ) + + frames = [] + for frame_indice_index in range(0, len(frame_indices)): + frame_indice = frame_indices[frame_indice_index] + try: + frames.append(video_reader[frame_indice].asnumpy()) # (T, H, W, C) + except Exception as e: + logger.debug(f"encounter error when get frame: {frame_indice}, error: {e}") + previous_counter = 1 + later_counter = 1 + previous_after_flag = True + if frame_indice == 0 or frame_indice == len(video_reader) - 1: + cur_tol = tol * 2 + else: + cur_tol = tol + while previous_counter < cur_tol or later_counter < cur_tol: + if previous_after_flag: + if frame_indice - previous_counter < 0: + previous_counter += 1 + previous_after_flag = not previous_after_flag + continue + try: + frames.append( + video_reader[frame_indice - previous_counter].asnumpy() + ) + logger.info( + f"replace {frame_indice}-th frame with {frame_indice-previous_counter}-th frame" + ) + frame_indices[frame_indice_index] = ( + frame_indice - previous_counter + ) + break + except Exception as e: + previous_counter += 1 + else: + if frame_indice + later_counter >= len(video_reader): + later_counter += 1 + previous_after_flag = not previous_after_flag + continue + try: + frames.append( + video_reader[frame_indice + later_counter].asnumpy() + ) + logger.info( + f"replace {frame_indice}-th frame with {frame_indice+later_counter}-th frame" + ) + frame_indices[frame_indice_index] = frame_indice + later_counter + break + except Exception as e: + later_counter += 1 + previous_after_flag = not previous_after_flag + + frames = np.stack(frames, axis=0) + assert len(frames) == len( + frame_indices + ), f"len(frames): {len(frames)} != len(frame_indices): {len(frame_indices)}" + + ret = [] + + url_sha1 = get_filename() + for idx, frame in enumerate(frames): + tmp = Image.fromarray(frame, "RGB") + if save_to_disk: + save_path = os.path.join(cache_dir, f"{url_sha1}", f"{idx}.png") + if not os.path.exists(os.path.dirname(save_path)): + os.makedirs(os.path.dirname(save_path)) + tmp.save(save_path) + tmp = save_path + ret.append(tmp) + + time_stamps = [ + frame_idx * video_meta["duration"] / video_meta["num_of_frame"] + for frame_idx in frame_indices + ] + + return ret, frame_indices, time_stamps + + +def render_single_image_with_timestamp( + image: Image, number: str, rate: float, font_path: str = FONT_PATH +): + """ + Function: Renders a timestamp to the image of pil.image + The timestamp size is the rate of min(width, height) + The font color is black, the outline is white, and the outline size is 10% of the font + Returns an Image object + """ + draw = ImageDraw.Draw(image) + width, height = image.size + font_size = int(min(width, height) * rate) + outline_size = int(font_size * 0.1) + font = ImageFont.truetype(font_path, font_size) + x = 0 + y = 0 + + # Draw a black timestamp with a white border + draw.text( + (x, y), + number, + font=font, + fill=(0, 0, 0), + stroke_width=outline_size, + stroke_fill=(255, 255, 255), + ) + + return image + + +def timestamp_converting(time_stamp_in_seconds): + """ + convert timestamp format from seconds to hr:min:sec + """ + # get hours + hours = 0 + while time_stamp_in_seconds >= 3600: + hours += 1 + time_stamp_in_seconds -= 3600 + # get minutes + mins = 0 + while time_stamp_in_seconds >= 60: + mins += 1 + time_stamp_in_seconds -= 60 + time_hours = f"{int(hours):02d}" + time_mins = f"{int(mins):02d}" + time_secs = f"{time_stamp_in_seconds:05.02f}" + fi_time_stamp = time_hours + ":" + time_mins + ":" + time_secs + + return fi_time_stamp + + +def render_frame_timestamp(frame, timestamp, font_rate=0.1): + """ + Function, given a frame, render the index in order + Logic: render the index to the upper left corner of the image + frame: frame, PIL.Image object + timestamp: timestamp, in seconds + font_rate: the ratio of font size to min(wi, hei) + """ + time_stamp = "time: " + timestamp_converting(timestamp) + new_frame = render_single_image_with_timestamp(frame, time_stamp, font_rate) + + return new_frame