diff --git a/config.json b/config.json index 96bd3e960c6d6f314e39d370577ec209c6eff61c..030a5c3190a61129bf36ad2c250410a834dbd8bc 100644 --- a/config.json +++ b/config.json @@ -25,11 +25,200 @@ "output_router_logits": false, "qkv_bias": false, "quantization_config": { - "autoround_version": "0.6.1", + "autoround_version": "0.6.1.dev", "bits": 4, "data_type": "int", "dataset": "github-code-clean", + "extra_config": { + "model.layers.0.mlp.gate": { + "bits": 16 + }, + "model.layers.1.mlp.gate": { + "bits": 16 + }, + "model.layers.10.mlp.gate": { + "bits": 16 + }, + "model.layers.11.mlp.gate": { + "bits": 16 + }, + "model.layers.12.mlp.gate": { + "bits": 16 + }, + "model.layers.13.mlp.gate": { + "bits": 16 + }, + "model.layers.14.mlp.gate": { + "bits": 16 + }, + "model.layers.15.mlp.gate": { + "bits": 16 + }, + "model.layers.16.mlp.gate": { + "bits": 16 + }, + "model.layers.17.mlp.gate": { + "bits": 16 + }, + "model.layers.18.mlp.gate": { + "bits": 16 + }, + "model.layers.19.mlp.gate": { + "bits": 16 + }, + "model.layers.2.mlp.gate": { + "bits": 16 + }, + "model.layers.20.mlp.gate": { + "bits": 16 + }, + "model.layers.21.mlp.gate": { + "bits": 16 + }, + "model.layers.22.mlp.gate": { + "bits": 16 + }, + "model.layers.23.mlp.gate": { + "bits": 16 + }, + "model.layers.24.mlp.gate": { + "bits": 16 + }, + "model.layers.25.mlp.gate": { + "bits": 16 + }, + "model.layers.26.mlp.gate": { + "bits": 16 + }, + "model.layers.27.mlp.gate": { + "bits": 16 + }, + "model.layers.28.mlp.gate": { + "bits": 16 + }, + "model.layers.29.mlp.gate": { + "bits": 16 + }, + "model.layers.3.mlp.gate": { + "bits": 16 + }, + "model.layers.30.mlp.gate": { + "bits": 16 + }, + "model.layers.31.mlp.gate": { + "bits": 16 + }, + "model.layers.32.mlp.gate": { + "bits": 16 + }, + "model.layers.33.mlp.gate": { + "bits": 16 + }, + "model.layers.34.mlp.gate": { + "bits": 16 + }, + "model.layers.35.mlp.gate": { + "bits": 16 + }, + "model.layers.36.mlp.gate": { + "bits": 16 + }, + "model.layers.37.mlp.gate": { + "bits": 16 + }, + "model.layers.38.mlp.gate": { + "bits": 16 + }, + "model.layers.39.mlp.gate": { + "bits": 16 + }, + "model.layers.4.mlp.gate": { + "bits": 16 + }, + "model.layers.40.mlp.gate": { + "bits": 16 + }, + "model.layers.41.mlp.gate": { + "bits": 16 + }, + "model.layers.42.mlp.gate": { + "bits": 16 + }, + "model.layers.43.mlp.gate": { + "bits": 16 + }, + "model.layers.44.mlp.gate": { + "bits": 16 + }, + "model.layers.45.mlp.gate": { + "bits": 16 + }, + "model.layers.46.mlp.gate": { + "bits": 16 + }, + "model.layers.47.mlp.gate": { + "bits": 16 + }, + "model.layers.48.mlp.gate": { + "bits": 16 + }, + "model.layers.49.mlp.gate": { + "bits": 16 + }, + "model.layers.5.mlp.gate": { + "bits": 16 + }, + "model.layers.50.mlp.gate": { + "bits": 16 + }, + "model.layers.51.mlp.gate": { + "bits": 16 + }, + "model.layers.52.mlp.gate": { + "bits": 16 + }, + "model.layers.53.mlp.gate": { + "bits": 16 + }, + "model.layers.54.mlp.gate": { + "bits": 16 + }, + "model.layers.55.mlp.gate": { + "bits": 16 + }, + "model.layers.56.mlp.gate": { + "bits": 16 + }, + "model.layers.57.mlp.gate": { + "bits": 16 + }, + "model.layers.58.mlp.gate": { + "bits": 16 + }, + "model.layers.59.mlp.gate": { + "bits": 16 + }, + "model.layers.6.mlp.gate": { + "bits": 16 + }, + "model.layers.60.mlp.gate": { + "bits": 16 + }, + "model.layers.61.mlp.gate": { + "bits": 16 + }, + "model.layers.7.mlp.gate": { + "bits": 16 + }, + "model.layers.8.mlp.gate": { + "bits": 16 + }, + "model.layers.9.mlp.gate": { + "bits": 16 + } + }, "group_size": 128, + "low_gpu_mem_usage": true, "nsamples": 512, "packing_format": "auto_round:auto_gptq", "quant_method": "auto-round", diff --git a/model-00001-of-00051.safetensors b/model-00001-of-00051.safetensors index 36272c9e584295533e3149c49a54bc011e97a5ac..dbfb0600ad74fa79280305ce0beb04e3c23a2934 100644 --- a/model-00001-of-00051.safetensors +++ b/model-00001-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74443a6c73c7d623b94123217f4de3e6a84592fb386c07c0aa7a8ffc61a63a6a -size 4992427456 +oid sha256:b2fcd389bed63b92c1b1aa5fcb49b832c056d7a8adf767e8f4fb32893c39788f +size 4993882608 diff --git a/model-00002-of-00051.safetensors b/model-00002-of-00051.safetensors index eab8e2157546bf2954078ba5dfb1e200c03ee2ee..0c0489203d890bc89e45096ae892a9b3433ac00a 100644 --- a/model-00002-of-00051.safetensors +++ b/model-00002-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fbe665bee4c71e98e581ffdfd9def0cffe8ce18d000dd7271ced80ea1707276 -size 4992455920 +oid sha256:a33b80aef046a74c06686a77e461e8cd6415c4d116920b22a33024a02e9ff89d +size 4995366224 diff --git a/model-00003-of-00051.safetensors b/model-00003-of-00051.safetensors index 9968f9defc247f4c13210f5efcf56b30ba5740b6..cc3e5f1f6846f1c03c686fae63bc080d98cd5214 100644 --- a/model-00003-of-00051.safetensors +++ b/model-00003-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:552c3197fe36f189326a9b0b278c1c9f5d82560b6eb448ae5a036f851e136c81 -size 4996824496 +oid sha256:1ff7c399897db37a99840f61365451b0dfb5b530bd97d6c960e8beaf0c01a17d +size 4998279648 diff --git a/model-00004-of-00051.safetensors b/model-00004-of-00051.safetensors index 21f9b06047dc887d29697e70d76039aadce0ecb7..20027b39d51e24762e93706a7a37106860e79c63 100644 --- a/model-00004-of-00051.safetensors +++ b/model-00004-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4affba3e95b63821c6d7ea19a3cf4863b2ce819c0dd162f2282e3ccee852d526 -size 4996824576 +oid sha256:ce12f6214414259ae65af3d305845d06f2a0936632f08bd09834b15057c4139d +size 4998279728 diff --git a/model-00005-of-00051.safetensors b/model-00005-of-00051.safetensors index b80d0abbc618b95b7fffcd4090c8ae56bede4d3c..45ee4e4e5f9e2e230b34ffb140e3caaec01dd37e 100644 --- a/model-00005-of-00051.safetensors +++ b/model-00005-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db080c72ad69ea17fff72910b96b855cb6a3a76f89a292134932815bfd815a81 -size 4996824768 +oid sha256:50772007d01d510759090056922489000ccd8d8e726737af3bf5b513fa31a61c +size 4998279920 diff --git a/model-00006-of-00051.safetensors b/model-00006-of-00051.safetensors index ee8f272db85f29412bf9b6893650666031aace7e..4e40d9a76dce6982d85a31ff7eef19cf2080267d 100644 --- a/model-00006-of-00051.safetensors +++ b/model-00006-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f24e4502941a45de33abc006ac6e88c0f07e739e62b77a0016d944794218cdf2 -size 4993579960 +oid sha256:be4eb50b79d02364f1994cd02ac534bfedfe6ecbfab3a144e525c6ac0c46f4b9 +size 4995035112 diff --git a/model-00007-of-00051.safetensors b/model-00007-of-00051.safetensors index 7a7529355092f2410e774379dd9d823108b67103..e080977d94aeea5ebb5db09fd7338ce20c56140c 100644 --- a/model-00007-of-00051.safetensors +++ b/model-00007-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3bb47990add7e36be3745f5830120954979e3097dd678171569733cb18a1c64 -size 4995700464 +oid sha256:1741212257c176dbbc0a0ae136607c1e9cdc37e8a432a9e0ffd229945eff18b3 +size 4998610768 diff --git a/model-00008-of-00051.safetensors b/model-00008-of-00051.safetensors index 3b74806f38e5504bd2ecb2adcdc791f6524d7816..731a2a05b3bc709a7345e521b9be51721e48318e 100644 --- a/model-00008-of-00051.safetensors +++ b/model-00008-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b017fe3e314decd8a66990622b829ce1e14b5d07bd37266dad28b2393036cedb -size 4996824576 +oid sha256:ac80a01f5f27c85ddcbad8fb338423069764518d45db3ac022e17ad12ebfcc55 +size 4998279728 diff --git a/model-00009-of-00051.safetensors b/model-00009-of-00051.safetensors index 9c0ad21aa7a2eba2defdbf122ac210b36e06fcc9..440024152d36d4a43c350130c6e398c4a76c12e6 100644 --- a/model-00009-of-00051.safetensors +++ b/model-00009-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef2d458a97ac05b4b023d4b9d395a7d8d03bd1953a112827a8c1dca19dbe9064 -size 4996825840 +oid sha256:816957b3fe770e12f1b53b620a387f5b6ee24566c9a08ad3c3f14f2ef72e1848 +size 4998280992 diff --git a/model-00010-of-00051.safetensors b/model-00010-of-00051.safetensors index 3f6f789269fabe47fc7d7e91b0ed5103303c7f79..f24e3341350402e7860e910db9b19619d0800219 100644 --- a/model-00010-of-00051.safetensors +++ b/model-00010-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9fb264e6105a1fddfe8b4826b19bda1d5f1347871ed01c2eb0f049428ef1a18e -size 4996826752 +oid sha256:f0aa050bc69cd6ab834285e968d3e5779ea2790d35ba5fbb2f6bff3663be51dd +size 4998281904 diff --git a/model-00011-of-00051.safetensors b/model-00011-of-00051.safetensors index eae20170d42319a5126a67a19e9cb68fbaccfce0..909daf95f031fb0137297f3161b0850182cd54b9 100644 --- a/model-00011-of-00051.safetensors +++ b/model-00011-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b392ab7231eb316ef4e177a5c8561e9a055add6d95dfb202969c5937a190168 -size 4992457328 +oid sha256:083c5c19ba42b9629d1e285144501f83fb94882a23a9b139524f047e60dfc74c +size 4995367624 diff --git a/model-00012-of-00051.safetensors b/model-00012-of-00051.safetensors index c57635440c6edd0e8dd1b115af680476f9c8b319..69590d2c14adbfac9ae8450e33cead693dea8056 100644 --- a/model-00012-of-00051.safetensors +++ b/model-00012-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:304618c0987fb6684a63e68191c790186077b615d8aa1a87d35caaff0772c84d -size 4996826392 +oid sha256:f609f8715b6b097edf7020a4c5f7eea07827ee38dd5a70c0efa0b143a1e92379 +size 4998281544 diff --git a/model-00013-of-00051.safetensors b/model-00013-of-00051.safetensors index e589916713d0e9647e04bed63c4daf85c5ba7622..9210ed715d51741d6e19b225b57f92212ecea57c 100644 --- a/model-00013-of-00051.safetensors +++ b/model-00013-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc7540bef0efd2d64de1fb4a072443bd1936f5f7479b59a8ac475ee8dfac986e -size 4996826552 +oid sha256:43de6719111133bf65c1194db77e6c79bf38732e3cbddb0a32fbae0996c5f57b +size 4998281704 diff --git a/model-00014-of-00051.safetensors b/model-00014-of-00051.safetensors index 34c74f10ad260fe1dc4bbafa7bd77b0fc8e2d067..711f0a7e1d9c84d57bf0f6adaf158783f02bcdba 100644 --- a/model-00014-of-00051.safetensors +++ b/model-00014-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b97914e50414818c2b4bda70ba47d2e51e8703d16edbb982fb4ec271b503b610 -size 4996826760 +oid sha256:f49d222d4d496b7ca8d00407baf229c76840ea6805c72f02ec8687675adbe31b +size 4998281912 diff --git a/model-00015-of-00051.safetensors b/model-00015-of-00051.safetensors index 5e2ed7a9cba94e162491feaf4f113db3d198340f..dd9b32cb0eec276476f135b4c871d20e7b15d90b 100644 --- a/model-00015-of-00051.safetensors +++ b/model-00015-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49fafc3f5a0dd737e1c9248efd402880f057b3f98fa6e20c39431375ad144c66 -size 4992457344 +oid sha256:3587f617e406574d1cf30ed4a5e60d21b1b687ef4c5c877388a37275e74db48b +size 4995367640 diff --git a/model-00016-of-00051.safetensors b/model-00016-of-00051.safetensors index ecc4826b94079fed8bc04749759e856888497561..9edd5d63d133b70147fdc3db0dc035b9d550d6e4 100644 --- a/model-00016-of-00051.safetensors +++ b/model-00016-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b7887a899e8ee0e57d7d71206dbc4a9c1beae420cac67bbde7e9c838e1da131 -size 4996826400 +oid sha256:c0975d3615ca1190f7c87dbf6d7e9c6b56a18ef568b1380eafcff91c6475f876 +size 4998281552 diff --git a/model-00017-of-00051.safetensors b/model-00017-of-00051.safetensors index 0e743fe36abbaaa2db9498e7f35a62f293a2e52f..7c8751a97ea4820d9378d08ea9068b51436526d1 100644 --- a/model-00017-of-00051.safetensors +++ b/model-00017-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7defa94a4b614318ae67d49beb2e7bcab5c8fbc4892d9508e0c12ba3f2e400df -size 4996826528 +oid sha256:32672be8a64d8bdbaec55b4f84d5d9e2c93575db1569a93d04e6a9316f30df0d +size 4998281672 diff --git a/model-00018-of-00051.safetensors b/model-00018-of-00051.safetensors index b6fc17f62a83867eae072c874ae365c0bb0a6d0c..c0fffc84e4a68fc6b4f6abcf5952724e39f37e0d 100644 --- a/model-00018-of-00051.safetensors +++ b/model-00018-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e472b6d5666827efe1200e3c2bdf6226ac4a4a751d3f12d186e200deae066e7b -size 4996826760 +oid sha256:633c76dd915b4e82d2f4efda934de3cd3cd40237069e72796f2fdb3640324c50 +size 4998281912 diff --git a/model-00019-of-00051.safetensors b/model-00019-of-00051.safetensors index 4a99988679fcaff8dce5a6ac24f3e544aedb61fd..571ba8c24768d2683bfa033fd26799af845dbb4c 100644 --- a/model-00019-of-00051.safetensors +++ b/model-00019-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4aa49a58c8512ea80034b8b81cb933df2c6d3271643787e826c1eb72239bbd1 -size 4992457360 +oid sha256:3e1f929548091e3f9ad95eb77d6b0ea8d35107fe5b71c45e5f1faeff8e76e7b3 +size 4995367664 diff --git a/model-00020-of-00051.safetensors b/model-00020-of-00051.safetensors index 94b50f46e6c464368770252beb01f8a616f3b76c..6a2737fa9302b6eddede3acf40f874520ea91954 100644 --- a/model-00020-of-00051.safetensors +++ b/model-00020-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72a6de676e8d17950e11e3f994c2e575af30352a73ca2f0296398c693ce2d2b9 -size 4996826400 +oid sha256:cee89e08d46853b2ccf9dfea33b6760d9af3adf1f374c62738962c4456ccb5c9 +size 4998281552 diff --git a/model-00021-of-00051.safetensors b/model-00021-of-00051.safetensors index 09e571eae324ed4314228572479c90f40a8d92a8..d334958361f6922f7b2f8bb13524ad2f6db6b7bc 100644 --- a/model-00021-of-00051.safetensors +++ b/model-00021-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa02bd4983e07e22dd514544a3e40b4d8bcc2e483c4986586bc38c5c88ff67eb -size 4996826512 +oid sha256:5637ebd0c556cb97a9937f5233d84990e40fdb5561127ca80be679453663c8e2 +size 4998281656 diff --git a/model-00022-of-00051.safetensors b/model-00022-of-00051.safetensors index 87adfaf3dd91d203eaa023971a7d5bfeafb1502f..16e50c740ccba8db858dfa6b908e563dddeeb276 100644 --- a/model-00022-of-00051.safetensors +++ b/model-00022-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ae4f232a63aeb0958b431c8dbe1cba473a1156a054e9da6bdde887a323fb2ca -size 4996826752 +oid sha256:535cdcbf20d633cdedbe9efd9824307becb3a5c8ddda6fbcf12f5aba7b9b849a +size 4998281904 diff --git a/model-00023-of-00051.safetensors b/model-00023-of-00051.safetensors index 9db39dc7c074eb4782bab5cda6487d4f007b7911..c8fc302dbc3f669cdbfcbaa029c8324b22c9c5b6 100644 --- a/model-00023-of-00051.safetensors +++ b/model-00023-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c432d8fb89a63e19e106d9c679c686c0acec347032fc199a0c1943fb31fb561a -size 4992457392 +oid sha256:7318d3731a8092ec3bf6c96652e4c96548d75a7d40da00db217abbdbb65fd80e +size 4995367688 diff --git a/model-00024-of-00051.safetensors b/model-00024-of-00051.safetensors index cecd9cd747f8b8d06af1d55a95f02b433dceaa35..61a5ae018a8b28b8ef1ab77a6322093bd01f66f6 100644 --- a/model-00024-of-00051.safetensors +++ b/model-00024-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d98512de2f774c931783a3c1b1136f27197eaf227eef7cea37a2aa4f7294aa87 -size 4996826392 +oid sha256:1188bb12c51f7e234bde19a08c4418cf9d8ddce3ad1f9eecf8d2a4b12398c5dc +size 4998281544 diff --git a/model-00025-of-00051.safetensors b/model-00025-of-00051.safetensors index befc5dac8f4bb66e5840c064f35b1b49987eb885..fd2b00cacd636227f037e7c3c06b4b1ef2f040f1 100644 --- a/model-00025-of-00051.safetensors +++ b/model-00025-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb74573ba3964a3cb96b80cc867407effafa6b07ad0a4007d92586cf915c87c7 -size 4996826488 +oid sha256:d3a04d9a9209224e25a50b2836ea2ce15029ffebb746c05f64ddb4996b5721e6 +size 4998281640 diff --git a/model-00026-of-00051.safetensors b/model-00026-of-00051.safetensors index 44e0b725777fb0c7f3cbda05f609201f3f0c2c85..dcc5d81ce6aa96cc1040505b61c6d8c266deacd6 100644 --- a/model-00026-of-00051.safetensors +++ b/model-00026-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5f929d12b6997a3c04f4f1798b0cba413229e081cc468ba6b56a3e5b55489e7 -size 4996826760 +oid sha256:864681e20af1c6ea01124fc6f3cf39b7e2a0c62d397537abdd6f0647723aab15 +size 4998281912 diff --git a/model-00027-of-00051.safetensors b/model-00027-of-00051.safetensors index 0cecca8b4bce9ac653bb740210c545e0502c41c7..008fe65f4da43c6c1009a9da403acf2f63fd4153 100644 --- a/model-00027-of-00051.safetensors +++ b/model-00027-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63cbb0d014d14a4373b9c6635eeba64c02f51c949d053c3282a832effaa95236 -size 4992457400 +oid sha256:f5f6219dde36ae3a4fab26f3a994f4ed7ffc12fae533dd8afa2e86652f299ed9 +size 4995367704 diff --git a/model-00028-of-00051.safetensors b/model-00028-of-00051.safetensors index fd7791c50fa027cca3f9bebbc4adab018f2eff81..0ed36a9b9a3a126d950ca104a2fc4024cd48188c 100644 --- a/model-00028-of-00051.safetensors +++ b/model-00028-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c698ea7baad409a8100d83ac002c5301aeeccd39d15d0a56abb88d5012e14668 -size 4996826400 +oid sha256:fcb0ff578823814685411e63e5bb0773be697cb0eb85b389bbac0a4c753e4cd5 +size 4998281552 diff --git a/model-00029-of-00051.safetensors b/model-00029-of-00051.safetensors index e4eab11e21a4df9205a29b9b19a815977e7837a0..f8882a1165e7053a99fb278caf737a6f014cb8ae 100644 --- a/model-00029-of-00051.safetensors +++ b/model-00029-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46931c424a2f884cee6894af92e36efcb10fd97e2140c8f22d9d5c2ce4df094c -size 4996826464 +oid sha256:a938b8b9f3739b4f5d819e17519a507cbb0666f84aff2f1a5eea720db164ed01 +size 4998281608 diff --git a/model-00030-of-00051.safetensors b/model-00030-of-00051.safetensors index 974f6c38829f975b77bedfd0cc1bb1bd34806031..ad4bbe372714b63c55686e62001ddf1821417469 100644 --- a/model-00030-of-00051.safetensors +++ b/model-00030-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:713ba8d17b2da177f4238bcb9566d139baf7046acc737ac866f77d2680ecbcd3 -size 4996826760 +oid sha256:d0ac78422c41e88a05a992263756159c7e06a0ff7f14a3ab4d5fd9d62456a30c +size 4998281912 diff --git a/model-00031-of-00051.safetensors b/model-00031-of-00051.safetensors index 570c44eaeb65e76f879cebefe53c72d885b62085..5463ddfdaf6a4c0733afc44633aaea88a89cc683 100644 --- a/model-00031-of-00051.safetensors +++ b/model-00031-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97b2edb59e4cb5e5264fe91f31a94314654dc00952404099054dcabd5dd2be28 -size 4992457424 +oid sha256:d0c88e585ea4a34ff759dfbc658e40223c2ecdadbb2b0cc9c5f1b7e5b2946879 +size 4995367728 diff --git a/model-00032-of-00051.safetensors b/model-00032-of-00051.safetensors index 2c1a3934202f8766904ae125e908d7b600df4efe..e721362801a17dbdbf6105350ea6d8962b2ae649 100644 --- a/model-00032-of-00051.safetensors +++ b/model-00032-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a458e26f7505ba58d0fe0c78d3fb273ef99691a1c688287e96baac52a24d0854 -size 4996826400 +oid sha256:7c3ce759d66f3e387da9416d1b4953d31dc05d0128c8d08c25ccb1d627951a1d +size 4998281552 diff --git a/model-00033-of-00051.safetensors b/model-00033-of-00051.safetensors index d3fe9846324646d747c00f9a62d1dd25ab1fe9cc..83cf5a23a7d3d76b8a7ee76eec19c8551bc64011 100644 --- a/model-00033-of-00051.safetensors +++ b/model-00033-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d021f800ef87434086ef1e1f5708afb8949c563f8162edcfb9517ac223ad0a7c -size 4996826448 +oid sha256:011cfdf8dd8bbbe49ad86d73accd70fedbb4786a814885378eea41a975c1476c +size 4998281600 diff --git a/model-00034-of-00051.safetensors b/model-00034-of-00051.safetensors index 706e6a9adb6ccac90097f467f6f19c23a960b091..5b7408ef9d45f97483d5ce7092540bd273c49730 100644 --- a/model-00034-of-00051.safetensors +++ b/model-00034-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9053b962f08ce21f3417dba7a9a10c22a7e4834f907cd2ca81fd09d9dfac4513 -size 4996826752 +oid sha256:0d7a1644aa1bac146551dafc17ff80bc3677aa45696b244213cc416610148cd4 +size 4998281904 diff --git a/model-00035-of-00051.safetensors b/model-00035-of-00051.safetensors index 5a6d68cee99de66b25c266319a1dc80fc7e6c991..29814e4572ca674035eff55f838661a7943d8c02 100644 --- a/model-00035-of-00051.safetensors +++ b/model-00035-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69ce1d49cca51575202ae962ac3b7bc4f0a9b661880a403835c56b423d8cce28 -size 4992457448 +oid sha256:220a43dbd83eb3de157a2a0360b6d39fc0b1db36a0fef7f864525e4f8e6ebb4d +size 4995367752 diff --git a/model-00036-of-00051.safetensors b/model-00036-of-00051.safetensors index 129571327ea6e7412679c7e35d88719f2ea4f116..b27e38780f5b71b2c509fa19ef2ead3e8f2a522e 100644 --- a/model-00036-of-00051.safetensors +++ b/model-00036-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:764d12d6cc9be6411a2aa096b40be0631cdf95ccce7ed97e9123d79a8d92c7de -size 4996826392 +oid sha256:cf6f07fc70ba47a7782a756567477cf3ff706cceea5fae3f881c664a29accd39 +size 4998281544 diff --git a/model-00037-of-00051.safetensors b/model-00037-of-00051.safetensors index bfd774adfbc72b20618b2de361f12cb6b6fda817..da91fa484fa6bade0910f60cbcc8587023f156cd 100644 --- a/model-00037-of-00051.safetensors +++ b/model-00037-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d59b386d3943bdbf7086608f3c6adcc18a1b0f17a5933529727d46d9c116efbb -size 4996826424 +oid sha256:6e324a3e9f32d8a23a7239256e9e82cc32fd6d7f91e0b43451eea9c41252e475 +size 4998281576 diff --git a/model-00038-of-00051.safetensors b/model-00038-of-00051.safetensors index 76082b596fb884114c875153a869e2b96b19976a..7eca8c9ad3e0b397b9ea360d59adb8cb4a1666d0 100644 --- a/model-00038-of-00051.safetensors +++ b/model-00038-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb955d54e1370baf463c7bfee530824eb1456e46909b49aac38e8fa9fad0ea16 -size 4996826760 +oid sha256:60a7950bf7c0d8f85b83de3c2261575e40bd685de3f4f0d066c0f8eb80ebbcfb +size 4998281912 diff --git a/model-00039-of-00051.safetensors b/model-00039-of-00051.safetensors index 6ffec22afb18ec1ed34bfd8ea727c78ea65f1ff6..1639e8496c3d6546140095dd29cca002ba785707 100644 --- a/model-00039-of-00051.safetensors +++ b/model-00039-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:320bed348e37d4a94a4877ce7dd54889f3f80f74d0baecf29de2fbbd5dbedee7 -size 4992457464 +oid sha256:845e9568d0aacc325d0adfa1a48f8c1da89d5992f3b34da689e982e445127b15 +size 4995367768 diff --git a/model-00040-of-00051.safetensors b/model-00040-of-00051.safetensors index 505c2fb8477645b52132f52540bfaa83b3705720..ff14f40879ba00d268af097ef7b2b7a3aa465d3d 100644 --- a/model-00040-of-00051.safetensors +++ b/model-00040-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:694102fb3db45705ccef8f2626aa36c44d127f200a98ddaca6577da900cc2441 -size 4996826400 +oid sha256:1c12be512814edece73bfed76257cea12779ce05147ba47e0d1a4e259b38bb2b +size 4998281552 diff --git a/model-00041-of-00051.safetensors b/model-00041-of-00051.safetensors index 06d3ed727238ed5aadebf5112940f82a3aca3b0d..1bda87f07e99a13e1697fb49d8211ca16c5bc8ad 100644 --- a/model-00041-of-00051.safetensors +++ b/model-00041-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec30fc52234122655f88ef96a419603e23787be2f839f78caa14ae533571bb3c -size 4996826400 +oid sha256:69d6e89b2a8299f0e91493026dafaf96993fbbf2d884314629c13aee60089d9b +size 4998281552 diff --git a/model-00042-of-00051.safetensors b/model-00042-of-00051.safetensors index 2158e2a0dbefe6c973a7c42b70bba0a58126021f..c17e30017cf9d2f91f82a151499ff3d6d563d1a5 100644 --- a/model-00042-of-00051.safetensors +++ b/model-00042-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e86333aad1126a1591d45bc2cc7c7b0fd4b176c407326e243de88363caf2e02 -size 4996826760 +oid sha256:17f8b6daef8e5d3d925b45691f411c547311a50b0ef76c745fabe6f37a5cb685 +size 4998281912 diff --git a/model-00043-of-00051.safetensors b/model-00043-of-00051.safetensors index c005ab0be629db225b1f378d20149696b6fe332e..449e0e265463cb8d02fbd7b0806e1dfc4746d8a1 100644 --- a/model-00043-of-00051.safetensors +++ b/model-00043-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:463e09a39241959a987391a785717f74eed815b51824028e5d1658d4462bc6fd -size 4992457488 +oid sha256:c05ffaa28d26ea938078232fac93e44b4cc5c5bfa28695239911413212571860 +size 4995367784 diff --git a/model-00044-of-00051.safetensors b/model-00044-of-00051.safetensors index 90bda1bba3b5dd93b676fb0ddd218fe80a4e3dae..e69ea24854a4147bf04807c985826d503896c017 100644 --- a/model-00044-of-00051.safetensors +++ b/model-00044-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41516352937a716a17d7f278cfdbcccb856337632abbaea615630e906e375ad5 -size 4996826400 +oid sha256:40e7f08ecd5747ecb4ad4f022221d787cdc3c4215bbf891b68772f2679d5e575 +size 4998281552 diff --git a/model-00045-of-00051.safetensors b/model-00045-of-00051.safetensors index 29b6ec1255ae218a63a440eb205ed485df325e93..a5972c3dbf29bbe2af940fd9aed09296a740a752 100644 --- a/model-00045-of-00051.safetensors +++ b/model-00045-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de8872992479bd5f54cc0096291cb9303f2eda0a4ff51ae7a745bf0fa669d263 -size 4996826400 +oid sha256:6303d8b2503625b1ae68086200eaa23eea32b0278ce4e9df7e47016ec2f564d9 +size 4998281552 diff --git a/model-00046-of-00051.safetensors b/model-00046-of-00051.safetensors index 8af1021463e81b1293d19cb463cc9c589eb381aa..d942c7ea6054853ebe2b74690d1c64827b5f7610 100644 --- a/model-00046-of-00051.safetensors +++ b/model-00046-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fd6fde4a42d298b2d8453ccebaf9273cecbe413256aeefac629200dbfbb12b3 -size 4996826744 +oid sha256:561d4300985b35c2783e22b59d0ed2ca4ceac11ebd7879583f2fee9b140a2c5e +size 4998281888 diff --git a/model-00047-of-00051.safetensors b/model-00047-of-00051.safetensors index dd1a8ed3ca66efcc87303e9a40584750e8313bc7..7efb7bf19f2a6718f9b7fd52c72718f79b56962f 100644 --- a/model-00047-of-00051.safetensors +++ b/model-00047-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fce7a23253e9d24f1c4ef48df8ca49fa26a36991c5f274197d6c42f83c5b2217 -size 4992457512 +oid sha256:9c472909efd503b616f034446ab102993bd4941ce19fb9f25355adcd01c60c56 +size 4995367816 diff --git a/model-00048-of-00051.safetensors b/model-00048-of-00051.safetensors index 1d918cb9400d62fc34bc726e5a80af342aa83fd9..44f919ec49895a625afe8cf5de697714bfb18e8e 100644 --- a/model-00048-of-00051.safetensors +++ b/model-00048-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:979fcdfa2286aad83ef67fa95edc5e7f5d1a2c2e605e13baea6ca2549a583a9c -size 4996826392 +oid sha256:6b610a149d36c1111bc618302e2f6762cb68cb4695187e6e34dc9a6fc1c64221 +size 4998281544 diff --git a/model-00049-of-00051.safetensors b/model-00049-of-00051.safetensors index 627899a5a4427519bc1f22f4924920c85e9d0478..6690312b5df04d4491c455272beea264e24d98ea 100644 --- a/model-00049-of-00051.safetensors +++ b/model-00049-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c47f07c77cfd9b11216f1f164fa20ce833f0ffe687cec7c0639e415a828ed868 -size 4996826400 +oid sha256:8f48e8a103db8452783c19e5a9132923cc9b51eebcb1246f28c4ab97445d3e50 +size 4998281552 diff --git a/model-00050-of-00051.safetensors b/model-00050-of-00051.safetensors index 96e1e7231f86a80c76edb92ea6ff2abc8be0ac13..c9e0c9667f9ece6bb880dddbd57037600b943c20 100644 --- a/model-00050-of-00051.safetensors +++ b/model-00050-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f8dcefa0679c99715f87aa11a3afca0a2bfa4c3b83b52b8fefaa4e473568710 -size 4996826728 +oid sha256:5a5295a08b0c0a4f3e6334d04304889fffcd73cf0634db91f9ccbd5140a9ac6f +size 4998281872 diff --git a/model-00051-of-00051.safetensors b/model-00051-of-00051.safetensors index 939d05a919e27f9c0e83c02f78b83c48ec07cd53..eca1057f9e4f27f951462e81e1ae33a073cef01e 100644 --- a/model-00051-of-00051.safetensors +++ b/model-00051-of-00051.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:796aa05e1cb613cc9e199c9a735f208aaa972a52e3e27ab43e3525de89d785e7 +oid sha256:9f39634fcf05fdc94de0e4622416b7d0e6e2c6d147459bbeba0eefceeeb00b3b size 2447230808 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 9c69cf46fa2aec960a81642f8445cf317352d522..7a0657435462b1543b77f1f150b390125b2dc8a7 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,7 +1,7 @@ { "metadata": { - "total_parameters": 1867773440, - "total_size": 252220642816 + "total_parameters": 1928721920, + "total_size": 252310875136 }, "weight_map": { "lm_head.weight": "model-00051-of-00051.safetensors", @@ -1447,9 +1447,7 @@ "model.layers.0.mlp.experts.99.up_proj.qweight": "model-00001-of-00051.safetensors", "model.layers.0.mlp.experts.99.up_proj.qzeros": "model-00001-of-00051.safetensors", "model.layers.0.mlp.experts.99.up_proj.scales": "model-00001-of-00051.safetensors", - "model.layers.0.mlp.gate.qweight": "model-00001-of-00051.safetensors", - "model.layers.0.mlp.gate.qzeros": "model-00001-of-00051.safetensors", - "model.layers.0.mlp.gate.scales": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.gate.weight": "model-00001-of-00051.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00051.safetensors", "model.layers.0.self_attn.k_proj.qweight": "model-00001-of-00051.safetensors", @@ -2906,9 +2904,7 @@ "model.layers.1.mlp.experts.99.up_proj.qweight": "model-00002-of-00051.safetensors", "model.layers.1.mlp.experts.99.up_proj.qzeros": "model-00002-of-00051.safetensors", "model.layers.1.mlp.experts.99.up_proj.scales": "model-00002-of-00051.safetensors", - "model.layers.1.mlp.gate.qweight": "model-00002-of-00051.safetensors", - "model.layers.1.mlp.gate.qzeros": "model-00002-of-00051.safetensors", - "model.layers.1.mlp.gate.scales": "model-00002-of-00051.safetensors", + "model.layers.1.mlp.gate.weight": "model-00002-of-00051.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", "model.layers.1.self_attn.k_norm.weight": "model-00002-of-00051.safetensors", "model.layers.1.self_attn.k_proj.qweight": "model-00002-of-00051.safetensors", @@ -4365,9 +4361,7 @@ "model.layers.10.mlp.experts.99.up_proj.qweight": "model-00009-of-00051.safetensors", "model.layers.10.mlp.experts.99.up_proj.qzeros": "model-00009-of-00051.safetensors", "model.layers.10.mlp.experts.99.up_proj.scales": "model-00009-of-00051.safetensors", - "model.layers.10.mlp.gate.qweight": "model-00009-of-00051.safetensors", - "model.layers.10.mlp.gate.qzeros": "model-00009-of-00051.safetensors", - "model.layers.10.mlp.gate.scales": "model-00009-of-00051.safetensors", + "model.layers.10.mlp.gate.weight": "model-00009-of-00051.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", "model.layers.10.self_attn.k_norm.weight": "model-00009-of-00051.safetensors", "model.layers.10.self_attn.k_proj.qweight": "model-00009-of-00051.safetensors", @@ -5824,9 +5818,7 @@ "model.layers.11.mlp.experts.99.up_proj.qweight": "model-00010-of-00051.safetensors", "model.layers.11.mlp.experts.99.up_proj.qzeros": "model-00010-of-00051.safetensors", "model.layers.11.mlp.experts.99.up_proj.scales": "model-00010-of-00051.safetensors", - "model.layers.11.mlp.gate.qweight": "model-00010-of-00051.safetensors", - "model.layers.11.mlp.gate.qzeros": "model-00010-of-00051.safetensors", - "model.layers.11.mlp.gate.scales": "model-00010-of-00051.safetensors", + "model.layers.11.mlp.gate.weight": "model-00010-of-00051.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", "model.layers.11.self_attn.k_norm.weight": "model-00010-of-00051.safetensors", "model.layers.11.self_attn.k_proj.qweight": "model-00010-of-00051.safetensors", @@ -7283,9 +7275,7 @@ "model.layers.12.mlp.experts.99.up_proj.qweight": "model-00011-of-00051.safetensors", "model.layers.12.mlp.experts.99.up_proj.qzeros": "model-00011-of-00051.safetensors", "model.layers.12.mlp.experts.99.up_proj.scales": "model-00011-of-00051.safetensors", - "model.layers.12.mlp.gate.qweight": "model-00011-of-00051.safetensors", - "model.layers.12.mlp.gate.qzeros": "model-00011-of-00051.safetensors", - "model.layers.12.mlp.gate.scales": "model-00011-of-00051.safetensors", + "model.layers.12.mlp.gate.weight": "model-00011-of-00051.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", "model.layers.12.self_attn.k_norm.weight": "model-00011-of-00051.safetensors", "model.layers.12.self_attn.k_proj.qweight": "model-00011-of-00051.safetensors", @@ -8742,9 +8732,7 @@ "model.layers.13.mlp.experts.99.up_proj.qweight": "model-00012-of-00051.safetensors", "model.layers.13.mlp.experts.99.up_proj.qzeros": "model-00012-of-00051.safetensors", "model.layers.13.mlp.experts.99.up_proj.scales": "model-00012-of-00051.safetensors", - "model.layers.13.mlp.gate.qweight": "model-00011-of-00051.safetensors", - "model.layers.13.mlp.gate.qzeros": "model-00011-of-00051.safetensors", - "model.layers.13.mlp.gate.scales": "model-00011-of-00051.safetensors", + "model.layers.13.mlp.gate.weight": "model-00011-of-00051.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00012-of-00051.safetensors", "model.layers.13.self_attn.k_norm.weight": "model-00011-of-00051.safetensors", "model.layers.13.self_attn.k_proj.qweight": "model-00011-of-00051.safetensors", @@ -10201,9 +10189,7 @@ "model.layers.14.mlp.experts.99.up_proj.qweight": "model-00013-of-00051.safetensors", "model.layers.14.mlp.experts.99.up_proj.qzeros": "model-00013-of-00051.safetensors", "model.layers.14.mlp.experts.99.up_proj.scales": "model-00013-of-00051.safetensors", - "model.layers.14.mlp.gate.qweight": "model-00012-of-00051.safetensors", - "model.layers.14.mlp.gate.qzeros": "model-00012-of-00051.safetensors", - "model.layers.14.mlp.gate.scales": "model-00012-of-00051.safetensors", + "model.layers.14.mlp.gate.weight": "model-00012-of-00051.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", "model.layers.14.self_attn.k_norm.weight": "model-00012-of-00051.safetensors", "model.layers.14.self_attn.k_proj.qweight": "model-00012-of-00051.safetensors", @@ -11660,9 +11646,7 @@ "model.layers.15.mlp.experts.99.up_proj.qweight": "model-00013-of-00051.safetensors", "model.layers.15.mlp.experts.99.up_proj.qzeros": "model-00013-of-00051.safetensors", "model.layers.15.mlp.experts.99.up_proj.scales": "model-00013-of-00051.safetensors", - "model.layers.15.mlp.gate.qweight": "model-00013-of-00051.safetensors", - "model.layers.15.mlp.gate.qzeros": "model-00013-of-00051.safetensors", - "model.layers.15.mlp.gate.scales": "model-00013-of-00051.safetensors", + "model.layers.15.mlp.gate.weight": "model-00013-of-00051.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", "model.layers.15.self_attn.k_norm.weight": "model-00013-of-00051.safetensors", "model.layers.15.self_attn.k_proj.qweight": "model-00013-of-00051.safetensors", @@ -13119,9 +13103,7 @@ "model.layers.16.mlp.experts.99.up_proj.qweight": "model-00014-of-00051.safetensors", "model.layers.16.mlp.experts.99.up_proj.qzeros": "model-00014-of-00051.safetensors", "model.layers.16.mlp.experts.99.up_proj.scales": "model-00014-of-00051.safetensors", - "model.layers.16.mlp.gate.qweight": "model-00014-of-00051.safetensors", - "model.layers.16.mlp.gate.qzeros": "model-00014-of-00051.safetensors", - "model.layers.16.mlp.gate.scales": "model-00014-of-00051.safetensors", + "model.layers.16.mlp.gate.weight": "model-00014-of-00051.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", "model.layers.16.self_attn.k_norm.weight": "model-00014-of-00051.safetensors", "model.layers.16.self_attn.k_proj.qweight": "model-00014-of-00051.safetensors", @@ -14578,9 +14560,7 @@ "model.layers.17.mlp.experts.99.up_proj.qweight": "model-00015-of-00051.safetensors", "model.layers.17.mlp.experts.99.up_proj.qzeros": "model-00015-of-00051.safetensors", "model.layers.17.mlp.experts.99.up_proj.scales": "model-00015-of-00051.safetensors", - "model.layers.17.mlp.gate.qweight": "model-00015-of-00051.safetensors", - "model.layers.17.mlp.gate.qzeros": "model-00015-of-00051.safetensors", - "model.layers.17.mlp.gate.scales": "model-00015-of-00051.safetensors", + "model.layers.17.mlp.gate.weight": "model-00015-of-00051.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", "model.layers.17.self_attn.k_norm.weight": "model-00015-of-00051.safetensors", "model.layers.17.self_attn.k_proj.qweight": "model-00015-of-00051.safetensors", @@ -16037,9 +16017,7 @@ "model.layers.18.mlp.experts.99.up_proj.qweight": "model-00016-of-00051.safetensors", "model.layers.18.mlp.experts.99.up_proj.qzeros": "model-00016-of-00051.safetensors", "model.layers.18.mlp.experts.99.up_proj.scales": "model-00016-of-00051.safetensors", - "model.layers.18.mlp.gate.qweight": "model-00015-of-00051.safetensors", - "model.layers.18.mlp.gate.qzeros": "model-00015-of-00051.safetensors", - "model.layers.18.mlp.gate.scales": "model-00015-of-00051.safetensors", + "model.layers.18.mlp.gate.weight": "model-00015-of-00051.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00016-of-00051.safetensors", "model.layers.18.self_attn.k_norm.weight": "model-00015-of-00051.safetensors", "model.layers.18.self_attn.k_proj.qweight": "model-00015-of-00051.safetensors", @@ -17496,9 +17474,7 @@ "model.layers.19.mlp.experts.99.up_proj.qweight": "model-00017-of-00051.safetensors", "model.layers.19.mlp.experts.99.up_proj.qzeros": "model-00017-of-00051.safetensors", "model.layers.19.mlp.experts.99.up_proj.scales": "model-00017-of-00051.safetensors", - "model.layers.19.mlp.gate.qweight": "model-00016-of-00051.safetensors", - "model.layers.19.mlp.gate.qzeros": "model-00016-of-00051.safetensors", - "model.layers.19.mlp.gate.scales": "model-00016-of-00051.safetensors", + "model.layers.19.mlp.gate.weight": "model-00016-of-00051.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", "model.layers.19.self_attn.k_norm.weight": "model-00016-of-00051.safetensors", "model.layers.19.self_attn.k_proj.qweight": "model-00016-of-00051.safetensors", @@ -18955,9 +18931,7 @@ "model.layers.2.mlp.experts.99.up_proj.qweight": "model-00003-of-00051.safetensors", "model.layers.2.mlp.experts.99.up_proj.qzeros": "model-00003-of-00051.safetensors", "model.layers.2.mlp.experts.99.up_proj.scales": "model-00003-of-00051.safetensors", - "model.layers.2.mlp.gate.qweight": "model-00002-of-00051.safetensors", - "model.layers.2.mlp.gate.qzeros": "model-00002-of-00051.safetensors", - "model.layers.2.mlp.gate.scales": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.gate.weight": "model-00002-of-00051.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00051.safetensors", "model.layers.2.self_attn.k_norm.weight": "model-00002-of-00051.safetensors", "model.layers.2.self_attn.k_proj.qweight": "model-00002-of-00051.safetensors", @@ -20414,9 +20388,7 @@ "model.layers.20.mlp.experts.99.up_proj.qweight": "model-00017-of-00051.safetensors", "model.layers.20.mlp.experts.99.up_proj.qzeros": "model-00017-of-00051.safetensors", "model.layers.20.mlp.experts.99.up_proj.scales": "model-00017-of-00051.safetensors", - "model.layers.20.mlp.gate.qweight": "model-00017-of-00051.safetensors", - "model.layers.20.mlp.gate.qzeros": "model-00017-of-00051.safetensors", - "model.layers.20.mlp.gate.scales": "model-00017-of-00051.safetensors", + "model.layers.20.mlp.gate.weight": "model-00017-of-00051.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", "model.layers.20.self_attn.k_norm.weight": "model-00017-of-00051.safetensors", "model.layers.20.self_attn.k_proj.qweight": "model-00017-of-00051.safetensors", @@ -21873,9 +21845,7 @@ "model.layers.21.mlp.experts.99.up_proj.qweight": "model-00018-of-00051.safetensors", "model.layers.21.mlp.experts.99.up_proj.qzeros": "model-00018-of-00051.safetensors", "model.layers.21.mlp.experts.99.up_proj.scales": "model-00018-of-00051.safetensors", - "model.layers.21.mlp.gate.qweight": "model-00018-of-00051.safetensors", - "model.layers.21.mlp.gate.qzeros": "model-00018-of-00051.safetensors", - "model.layers.21.mlp.gate.scales": "model-00018-of-00051.safetensors", + "model.layers.21.mlp.gate.weight": "model-00018-of-00051.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", "model.layers.21.self_attn.k_norm.weight": "model-00018-of-00051.safetensors", "model.layers.21.self_attn.k_proj.qweight": "model-00018-of-00051.safetensors", @@ -23332,9 +23302,7 @@ "model.layers.22.mlp.experts.99.up_proj.qweight": "model-00019-of-00051.safetensors", "model.layers.22.mlp.experts.99.up_proj.qzeros": "model-00019-of-00051.safetensors", "model.layers.22.mlp.experts.99.up_proj.scales": "model-00019-of-00051.safetensors", - "model.layers.22.mlp.gate.qweight": "model-00019-of-00051.safetensors", - "model.layers.22.mlp.gate.qzeros": "model-00019-of-00051.safetensors", - "model.layers.22.mlp.gate.scales": "model-00019-of-00051.safetensors", + "model.layers.22.mlp.gate.weight": "model-00019-of-00051.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", "model.layers.22.self_attn.k_norm.weight": "model-00019-of-00051.safetensors", "model.layers.22.self_attn.k_proj.qweight": "model-00019-of-00051.safetensors", @@ -24791,9 +24759,7 @@ "model.layers.23.mlp.experts.99.up_proj.qweight": "model-00020-of-00051.safetensors", "model.layers.23.mlp.experts.99.up_proj.qzeros": "model-00020-of-00051.safetensors", "model.layers.23.mlp.experts.99.up_proj.scales": "model-00020-of-00051.safetensors", - "model.layers.23.mlp.gate.qweight": "model-00019-of-00051.safetensors", - "model.layers.23.mlp.gate.qzeros": "model-00019-of-00051.safetensors", - "model.layers.23.mlp.gate.scales": "model-00019-of-00051.safetensors", + "model.layers.23.mlp.gate.weight": "model-00019-of-00051.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00020-of-00051.safetensors", "model.layers.23.self_attn.k_norm.weight": "model-00019-of-00051.safetensors", "model.layers.23.self_attn.k_proj.qweight": "model-00019-of-00051.safetensors", @@ -26250,9 +26216,7 @@ "model.layers.24.mlp.experts.99.up_proj.qweight": "model-00021-of-00051.safetensors", "model.layers.24.mlp.experts.99.up_proj.qzeros": "model-00021-of-00051.safetensors", "model.layers.24.mlp.experts.99.up_proj.scales": "model-00021-of-00051.safetensors", - "model.layers.24.mlp.gate.qweight": "model-00020-of-00051.safetensors", - "model.layers.24.mlp.gate.qzeros": "model-00020-of-00051.safetensors", - "model.layers.24.mlp.gate.scales": "model-00020-of-00051.safetensors", + "model.layers.24.mlp.gate.weight": "model-00020-of-00051.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", "model.layers.24.self_attn.k_norm.weight": "model-00020-of-00051.safetensors", "model.layers.24.self_attn.k_proj.qweight": "model-00020-of-00051.safetensors", @@ -27709,9 +27673,7 @@ "model.layers.25.mlp.experts.99.up_proj.qweight": "model-00021-of-00051.safetensors", "model.layers.25.mlp.experts.99.up_proj.qzeros": "model-00021-of-00051.safetensors", "model.layers.25.mlp.experts.99.up_proj.scales": "model-00021-of-00051.safetensors", - "model.layers.25.mlp.gate.qweight": "model-00021-of-00051.safetensors", - "model.layers.25.mlp.gate.qzeros": "model-00021-of-00051.safetensors", - "model.layers.25.mlp.gate.scales": "model-00021-of-00051.safetensors", + "model.layers.25.mlp.gate.weight": "model-00021-of-00051.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", "model.layers.25.self_attn.k_norm.weight": "model-00021-of-00051.safetensors", "model.layers.25.self_attn.k_proj.qweight": "model-00021-of-00051.safetensors", @@ -29168,9 +29130,7 @@ "model.layers.26.mlp.experts.99.up_proj.qweight": "model-00022-of-00051.safetensors", "model.layers.26.mlp.experts.99.up_proj.qzeros": "model-00022-of-00051.safetensors", "model.layers.26.mlp.experts.99.up_proj.scales": "model-00022-of-00051.safetensors", - "model.layers.26.mlp.gate.qweight": "model-00022-of-00051.safetensors", - "model.layers.26.mlp.gate.qzeros": "model-00022-of-00051.safetensors", - "model.layers.26.mlp.gate.scales": "model-00022-of-00051.safetensors", + "model.layers.26.mlp.gate.weight": "model-00022-of-00051.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", "model.layers.26.self_attn.k_norm.weight": "model-00022-of-00051.safetensors", "model.layers.26.self_attn.k_proj.qweight": "model-00022-of-00051.safetensors", @@ -30627,9 +30587,7 @@ "model.layers.27.mlp.experts.99.up_proj.qweight": "model-00023-of-00051.safetensors", "model.layers.27.mlp.experts.99.up_proj.qzeros": "model-00023-of-00051.safetensors", "model.layers.27.mlp.experts.99.up_proj.scales": "model-00023-of-00051.safetensors", - "model.layers.27.mlp.gate.qweight": "model-00023-of-00051.safetensors", - "model.layers.27.mlp.gate.qzeros": "model-00023-of-00051.safetensors", - "model.layers.27.mlp.gate.scales": "model-00023-of-00051.safetensors", + "model.layers.27.mlp.gate.weight": "model-00023-of-00051.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", "model.layers.27.self_attn.k_norm.weight": "model-00023-of-00051.safetensors", "model.layers.27.self_attn.k_proj.qweight": "model-00023-of-00051.safetensors", @@ -32086,9 +32044,7 @@ "model.layers.28.mlp.experts.99.up_proj.qweight": "model-00024-of-00051.safetensors", "model.layers.28.mlp.experts.99.up_proj.qzeros": "model-00024-of-00051.safetensors", "model.layers.28.mlp.experts.99.up_proj.scales": "model-00024-of-00051.safetensors", - "model.layers.28.mlp.gate.qweight": "model-00023-of-00051.safetensors", - "model.layers.28.mlp.gate.qzeros": "model-00023-of-00051.safetensors", - "model.layers.28.mlp.gate.scales": "model-00023-of-00051.safetensors", + "model.layers.28.mlp.gate.weight": "model-00023-of-00051.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00024-of-00051.safetensors", "model.layers.28.self_attn.k_norm.weight": "model-00023-of-00051.safetensors", "model.layers.28.self_attn.k_proj.qweight": "model-00023-of-00051.safetensors", @@ -33545,9 +33501,7 @@ "model.layers.29.mlp.experts.99.up_proj.qweight": "model-00025-of-00051.safetensors", "model.layers.29.mlp.experts.99.up_proj.qzeros": "model-00025-of-00051.safetensors", "model.layers.29.mlp.experts.99.up_proj.scales": "model-00025-of-00051.safetensors", - "model.layers.29.mlp.gate.qweight": "model-00024-of-00051.safetensors", - "model.layers.29.mlp.gate.qzeros": "model-00024-of-00051.safetensors", - "model.layers.29.mlp.gate.scales": "model-00024-of-00051.safetensors", + "model.layers.29.mlp.gate.weight": "model-00024-of-00051.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", "model.layers.29.self_attn.k_norm.weight": "model-00024-of-00051.safetensors", "model.layers.29.self_attn.k_proj.qweight": "model-00024-of-00051.safetensors", @@ -35004,9 +34958,7 @@ "model.layers.3.mlp.experts.99.up_proj.qweight": "model-00004-of-00051.safetensors", "model.layers.3.mlp.experts.99.up_proj.qzeros": "model-00004-of-00051.safetensors", "model.layers.3.mlp.experts.99.up_proj.scales": "model-00004-of-00051.safetensors", - "model.layers.3.mlp.gate.qweight": "model-00003-of-00051.safetensors", - "model.layers.3.mlp.gate.qzeros": "model-00003-of-00051.safetensors", - "model.layers.3.mlp.gate.scales": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.gate.weight": "model-00003-of-00051.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00051.safetensors", "model.layers.3.self_attn.k_norm.weight": "model-00003-of-00051.safetensors", "model.layers.3.self_attn.k_proj.qweight": "model-00003-of-00051.safetensors", @@ -36463,9 +36415,7 @@ "model.layers.30.mlp.experts.99.up_proj.qweight": "model-00025-of-00051.safetensors", "model.layers.30.mlp.experts.99.up_proj.qzeros": "model-00025-of-00051.safetensors", "model.layers.30.mlp.experts.99.up_proj.scales": "model-00025-of-00051.safetensors", - "model.layers.30.mlp.gate.qweight": "model-00025-of-00051.safetensors", - "model.layers.30.mlp.gate.qzeros": "model-00025-of-00051.safetensors", - "model.layers.30.mlp.gate.scales": "model-00025-of-00051.safetensors", + "model.layers.30.mlp.gate.weight": "model-00025-of-00051.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", "model.layers.30.self_attn.k_norm.weight": "model-00025-of-00051.safetensors", "model.layers.30.self_attn.k_proj.qweight": "model-00025-of-00051.safetensors", @@ -37922,9 +37872,7 @@ "model.layers.31.mlp.experts.99.up_proj.qweight": "model-00026-of-00051.safetensors", "model.layers.31.mlp.experts.99.up_proj.qzeros": "model-00026-of-00051.safetensors", "model.layers.31.mlp.experts.99.up_proj.scales": "model-00026-of-00051.safetensors", - "model.layers.31.mlp.gate.qweight": "model-00026-of-00051.safetensors", - "model.layers.31.mlp.gate.qzeros": "model-00026-of-00051.safetensors", - "model.layers.31.mlp.gate.scales": "model-00026-of-00051.safetensors", + "model.layers.31.mlp.gate.weight": "model-00026-of-00051.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", "model.layers.31.self_attn.k_norm.weight": "model-00026-of-00051.safetensors", "model.layers.31.self_attn.k_proj.qweight": "model-00026-of-00051.safetensors", @@ -39381,9 +39329,7 @@ "model.layers.32.mlp.experts.99.up_proj.qweight": "model-00027-of-00051.safetensors", "model.layers.32.mlp.experts.99.up_proj.qzeros": "model-00027-of-00051.safetensors", "model.layers.32.mlp.experts.99.up_proj.scales": "model-00027-of-00051.safetensors", - "model.layers.32.mlp.gate.qweight": "model-00027-of-00051.safetensors", - "model.layers.32.mlp.gate.qzeros": "model-00027-of-00051.safetensors", - "model.layers.32.mlp.gate.scales": "model-00027-of-00051.safetensors", + "model.layers.32.mlp.gate.weight": "model-00027-of-00051.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", "model.layers.32.self_attn.k_norm.weight": "model-00027-of-00051.safetensors", "model.layers.32.self_attn.k_proj.qweight": "model-00027-of-00051.safetensors", @@ -40840,9 +40786,7 @@ "model.layers.33.mlp.experts.99.up_proj.qweight": "model-00028-of-00051.safetensors", "model.layers.33.mlp.experts.99.up_proj.qzeros": "model-00028-of-00051.safetensors", "model.layers.33.mlp.experts.99.up_proj.scales": "model-00028-of-00051.safetensors", - "model.layers.33.mlp.gate.qweight": "model-00027-of-00051.safetensors", - "model.layers.33.mlp.gate.qzeros": "model-00027-of-00051.safetensors", - "model.layers.33.mlp.gate.scales": "model-00027-of-00051.safetensors", + "model.layers.33.mlp.gate.weight": "model-00027-of-00051.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00028-of-00051.safetensors", "model.layers.33.self_attn.k_norm.weight": "model-00027-of-00051.safetensors", "model.layers.33.self_attn.k_proj.qweight": "model-00027-of-00051.safetensors", @@ -42299,9 +42243,7 @@ "model.layers.34.mlp.experts.99.up_proj.qweight": "model-00029-of-00051.safetensors", "model.layers.34.mlp.experts.99.up_proj.qzeros": "model-00029-of-00051.safetensors", "model.layers.34.mlp.experts.99.up_proj.scales": "model-00029-of-00051.safetensors", - "model.layers.34.mlp.gate.qweight": "model-00028-of-00051.safetensors", - "model.layers.34.mlp.gate.qzeros": "model-00028-of-00051.safetensors", - "model.layers.34.mlp.gate.scales": "model-00028-of-00051.safetensors", + "model.layers.34.mlp.gate.weight": "model-00028-of-00051.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", "model.layers.34.self_attn.k_norm.weight": "model-00028-of-00051.safetensors", "model.layers.34.self_attn.k_proj.qweight": "model-00028-of-00051.safetensors", @@ -43758,9 +43700,7 @@ "model.layers.35.mlp.experts.99.up_proj.qweight": "model-00029-of-00051.safetensors", "model.layers.35.mlp.experts.99.up_proj.qzeros": "model-00029-of-00051.safetensors", "model.layers.35.mlp.experts.99.up_proj.scales": "model-00029-of-00051.safetensors", - "model.layers.35.mlp.gate.qweight": "model-00029-of-00051.safetensors", - "model.layers.35.mlp.gate.qzeros": "model-00029-of-00051.safetensors", - "model.layers.35.mlp.gate.scales": "model-00029-of-00051.safetensors", + "model.layers.35.mlp.gate.weight": "model-00029-of-00051.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", "model.layers.35.self_attn.k_norm.weight": "model-00029-of-00051.safetensors", "model.layers.35.self_attn.k_proj.qweight": "model-00029-of-00051.safetensors", @@ -45217,9 +45157,7 @@ "model.layers.36.mlp.experts.99.up_proj.qweight": "model-00030-of-00051.safetensors", "model.layers.36.mlp.experts.99.up_proj.qzeros": "model-00030-of-00051.safetensors", "model.layers.36.mlp.experts.99.up_proj.scales": "model-00030-of-00051.safetensors", - "model.layers.36.mlp.gate.qweight": "model-00030-of-00051.safetensors", - "model.layers.36.mlp.gate.qzeros": "model-00030-of-00051.safetensors", - "model.layers.36.mlp.gate.scales": "model-00030-of-00051.safetensors", + "model.layers.36.mlp.gate.weight": "model-00030-of-00051.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", "model.layers.36.self_attn.k_norm.weight": "model-00030-of-00051.safetensors", "model.layers.36.self_attn.k_proj.qweight": "model-00030-of-00051.safetensors", @@ -46676,9 +46614,7 @@ "model.layers.37.mlp.experts.99.up_proj.qweight": "model-00031-of-00051.safetensors", "model.layers.37.mlp.experts.99.up_proj.qzeros": "model-00031-of-00051.safetensors", "model.layers.37.mlp.experts.99.up_proj.scales": "model-00031-of-00051.safetensors", - "model.layers.37.mlp.gate.qweight": "model-00031-of-00051.safetensors", - "model.layers.37.mlp.gate.qzeros": "model-00031-of-00051.safetensors", - "model.layers.37.mlp.gate.scales": "model-00031-of-00051.safetensors", + "model.layers.37.mlp.gate.weight": "model-00031-of-00051.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", "model.layers.37.self_attn.k_norm.weight": "model-00031-of-00051.safetensors", "model.layers.37.self_attn.k_proj.qweight": "model-00031-of-00051.safetensors", @@ -48135,9 +48071,7 @@ "model.layers.38.mlp.experts.99.up_proj.qweight": "model-00032-of-00051.safetensors", "model.layers.38.mlp.experts.99.up_proj.qzeros": "model-00032-of-00051.safetensors", "model.layers.38.mlp.experts.99.up_proj.scales": "model-00032-of-00051.safetensors", - "model.layers.38.mlp.gate.qweight": "model-00031-of-00051.safetensors", - "model.layers.38.mlp.gate.qzeros": "model-00031-of-00051.safetensors", - "model.layers.38.mlp.gate.scales": "model-00031-of-00051.safetensors", + "model.layers.38.mlp.gate.weight": "model-00031-of-00051.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00032-of-00051.safetensors", "model.layers.38.self_attn.k_norm.weight": "model-00031-of-00051.safetensors", "model.layers.38.self_attn.k_proj.qweight": "model-00031-of-00051.safetensors", @@ -49594,9 +49528,7 @@ "model.layers.39.mlp.experts.99.up_proj.qweight": "model-00033-of-00051.safetensors", "model.layers.39.mlp.experts.99.up_proj.qzeros": "model-00033-of-00051.safetensors", "model.layers.39.mlp.experts.99.up_proj.scales": "model-00033-of-00051.safetensors", - "model.layers.39.mlp.gate.qweight": "model-00032-of-00051.safetensors", - "model.layers.39.mlp.gate.qzeros": "model-00032-of-00051.safetensors", - "model.layers.39.mlp.gate.scales": "model-00032-of-00051.safetensors", + "model.layers.39.mlp.gate.weight": "model-00032-of-00051.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", "model.layers.39.self_attn.k_norm.weight": "model-00032-of-00051.safetensors", "model.layers.39.self_attn.k_proj.qweight": "model-00032-of-00051.safetensors", @@ -51053,9 +50985,7 @@ "model.layers.4.mlp.experts.99.up_proj.qweight": "model-00005-of-00051.safetensors", "model.layers.4.mlp.experts.99.up_proj.qzeros": "model-00005-of-00051.safetensors", "model.layers.4.mlp.experts.99.up_proj.scales": "model-00005-of-00051.safetensors", - "model.layers.4.mlp.gate.qweight": "model-00004-of-00051.safetensors", - "model.layers.4.mlp.gate.qzeros": "model-00004-of-00051.safetensors", - "model.layers.4.mlp.gate.scales": "model-00004-of-00051.safetensors", + "model.layers.4.mlp.gate.weight": "model-00004-of-00051.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", "model.layers.4.self_attn.k_norm.weight": "model-00004-of-00051.safetensors", "model.layers.4.self_attn.k_proj.qweight": "model-00004-of-00051.safetensors", @@ -52512,9 +52442,7 @@ "model.layers.40.mlp.experts.99.up_proj.qweight": "model-00033-of-00051.safetensors", "model.layers.40.mlp.experts.99.up_proj.qzeros": "model-00033-of-00051.safetensors", "model.layers.40.mlp.experts.99.up_proj.scales": "model-00033-of-00051.safetensors", - "model.layers.40.mlp.gate.qweight": "model-00033-of-00051.safetensors", - "model.layers.40.mlp.gate.qzeros": "model-00033-of-00051.safetensors", - "model.layers.40.mlp.gate.scales": "model-00033-of-00051.safetensors", + "model.layers.40.mlp.gate.weight": "model-00033-of-00051.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", "model.layers.40.self_attn.k_norm.weight": "model-00033-of-00051.safetensors", "model.layers.40.self_attn.k_proj.qweight": "model-00033-of-00051.safetensors", @@ -53971,9 +53899,7 @@ "model.layers.41.mlp.experts.99.up_proj.qweight": "model-00034-of-00051.safetensors", "model.layers.41.mlp.experts.99.up_proj.qzeros": "model-00034-of-00051.safetensors", "model.layers.41.mlp.experts.99.up_proj.scales": "model-00034-of-00051.safetensors", - "model.layers.41.mlp.gate.qweight": "model-00034-of-00051.safetensors", - "model.layers.41.mlp.gate.qzeros": "model-00034-of-00051.safetensors", - "model.layers.41.mlp.gate.scales": "model-00034-of-00051.safetensors", + "model.layers.41.mlp.gate.weight": "model-00034-of-00051.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", "model.layers.41.self_attn.k_norm.weight": "model-00034-of-00051.safetensors", "model.layers.41.self_attn.k_proj.qweight": "model-00034-of-00051.safetensors", @@ -55430,9 +55356,7 @@ "model.layers.42.mlp.experts.99.up_proj.qweight": "model-00035-of-00051.safetensors", "model.layers.42.mlp.experts.99.up_proj.qzeros": "model-00035-of-00051.safetensors", "model.layers.42.mlp.experts.99.up_proj.scales": "model-00035-of-00051.safetensors", - "model.layers.42.mlp.gate.qweight": "model-00035-of-00051.safetensors", - "model.layers.42.mlp.gate.qzeros": "model-00035-of-00051.safetensors", - "model.layers.42.mlp.gate.scales": "model-00035-of-00051.safetensors", + "model.layers.42.mlp.gate.weight": "model-00035-of-00051.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", "model.layers.42.self_attn.k_norm.weight": "model-00035-of-00051.safetensors", "model.layers.42.self_attn.k_proj.qweight": "model-00035-of-00051.safetensors", @@ -56889,9 +56813,7 @@ "model.layers.43.mlp.experts.99.up_proj.qweight": "model-00036-of-00051.safetensors", "model.layers.43.mlp.experts.99.up_proj.qzeros": "model-00036-of-00051.safetensors", "model.layers.43.mlp.experts.99.up_proj.scales": "model-00036-of-00051.safetensors", - "model.layers.43.mlp.gate.qweight": "model-00035-of-00051.safetensors", - "model.layers.43.mlp.gate.qzeros": "model-00035-of-00051.safetensors", - "model.layers.43.mlp.gate.scales": "model-00035-of-00051.safetensors", + "model.layers.43.mlp.gate.weight": "model-00035-of-00051.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00036-of-00051.safetensors", "model.layers.43.self_attn.k_norm.weight": "model-00035-of-00051.safetensors", "model.layers.43.self_attn.k_proj.qweight": "model-00035-of-00051.safetensors", @@ -58348,9 +58270,7 @@ "model.layers.44.mlp.experts.99.up_proj.qweight": "model-00037-of-00051.safetensors", "model.layers.44.mlp.experts.99.up_proj.qzeros": "model-00037-of-00051.safetensors", "model.layers.44.mlp.experts.99.up_proj.scales": "model-00037-of-00051.safetensors", - "model.layers.44.mlp.gate.qweight": "model-00036-of-00051.safetensors", - "model.layers.44.mlp.gate.qzeros": "model-00036-of-00051.safetensors", - "model.layers.44.mlp.gate.scales": "model-00036-of-00051.safetensors", + "model.layers.44.mlp.gate.weight": "model-00036-of-00051.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", "model.layers.44.self_attn.k_norm.weight": "model-00036-of-00051.safetensors", "model.layers.44.self_attn.k_proj.qweight": "model-00036-of-00051.safetensors", @@ -59807,9 +59727,7 @@ "model.layers.45.mlp.experts.99.up_proj.qweight": "model-00037-of-00051.safetensors", "model.layers.45.mlp.experts.99.up_proj.qzeros": "model-00037-of-00051.safetensors", "model.layers.45.mlp.experts.99.up_proj.scales": "model-00037-of-00051.safetensors", - "model.layers.45.mlp.gate.qweight": "model-00037-of-00051.safetensors", - "model.layers.45.mlp.gate.qzeros": "model-00037-of-00051.safetensors", - "model.layers.45.mlp.gate.scales": "model-00037-of-00051.safetensors", + "model.layers.45.mlp.gate.weight": "model-00037-of-00051.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", "model.layers.45.self_attn.k_norm.weight": "model-00037-of-00051.safetensors", "model.layers.45.self_attn.k_proj.qweight": "model-00037-of-00051.safetensors", @@ -61266,9 +61184,7 @@ "model.layers.46.mlp.experts.99.up_proj.qweight": "model-00038-of-00051.safetensors", "model.layers.46.mlp.experts.99.up_proj.qzeros": "model-00038-of-00051.safetensors", "model.layers.46.mlp.experts.99.up_proj.scales": "model-00038-of-00051.safetensors", - "model.layers.46.mlp.gate.qweight": "model-00038-of-00051.safetensors", - "model.layers.46.mlp.gate.qzeros": "model-00038-of-00051.safetensors", - "model.layers.46.mlp.gate.scales": "model-00038-of-00051.safetensors", + "model.layers.46.mlp.gate.weight": "model-00038-of-00051.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", "model.layers.46.self_attn.k_norm.weight": "model-00038-of-00051.safetensors", "model.layers.46.self_attn.k_proj.qweight": "model-00038-of-00051.safetensors", @@ -62725,9 +62641,7 @@ "model.layers.47.mlp.experts.99.up_proj.qweight": "model-00039-of-00051.safetensors", "model.layers.47.mlp.experts.99.up_proj.qzeros": "model-00039-of-00051.safetensors", "model.layers.47.mlp.experts.99.up_proj.scales": "model-00039-of-00051.safetensors", - "model.layers.47.mlp.gate.qweight": "model-00039-of-00051.safetensors", - "model.layers.47.mlp.gate.qzeros": "model-00039-of-00051.safetensors", - "model.layers.47.mlp.gate.scales": "model-00039-of-00051.safetensors", + "model.layers.47.mlp.gate.weight": "model-00039-of-00051.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", "model.layers.47.self_attn.k_norm.weight": "model-00039-of-00051.safetensors", "model.layers.47.self_attn.k_proj.qweight": "model-00039-of-00051.safetensors", @@ -64184,9 +64098,7 @@ "model.layers.48.mlp.experts.99.up_proj.qweight": "model-00040-of-00051.safetensors", "model.layers.48.mlp.experts.99.up_proj.qzeros": "model-00040-of-00051.safetensors", "model.layers.48.mlp.experts.99.up_proj.scales": "model-00040-of-00051.safetensors", - "model.layers.48.mlp.gate.qweight": "model-00039-of-00051.safetensors", - "model.layers.48.mlp.gate.qzeros": "model-00039-of-00051.safetensors", - "model.layers.48.mlp.gate.scales": "model-00039-of-00051.safetensors", + "model.layers.48.mlp.gate.weight": "model-00039-of-00051.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00040-of-00051.safetensors", "model.layers.48.self_attn.k_norm.weight": "model-00039-of-00051.safetensors", "model.layers.48.self_attn.k_proj.qweight": "model-00039-of-00051.safetensors", @@ -65643,9 +65555,7 @@ "model.layers.49.mlp.experts.99.up_proj.qweight": "model-00041-of-00051.safetensors", "model.layers.49.mlp.experts.99.up_proj.qzeros": "model-00041-of-00051.safetensors", "model.layers.49.mlp.experts.99.up_proj.scales": "model-00041-of-00051.safetensors", - "model.layers.49.mlp.gate.qweight": "model-00040-of-00051.safetensors", - "model.layers.49.mlp.gate.qzeros": "model-00040-of-00051.safetensors", - "model.layers.49.mlp.gate.scales": "model-00040-of-00051.safetensors", + "model.layers.49.mlp.gate.weight": "model-00040-of-00051.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", "model.layers.49.self_attn.k_norm.weight": "model-00040-of-00051.safetensors", "model.layers.49.self_attn.k_proj.qweight": "model-00040-of-00051.safetensors", @@ -67102,9 +67012,7 @@ "model.layers.5.mlp.experts.99.up_proj.qweight": "model-00005-of-00051.safetensors", "model.layers.5.mlp.experts.99.up_proj.qzeros": "model-00005-of-00051.safetensors", "model.layers.5.mlp.experts.99.up_proj.scales": "model-00005-of-00051.safetensors", - "model.layers.5.mlp.gate.qweight": "model-00005-of-00051.safetensors", - "model.layers.5.mlp.gate.qzeros": "model-00005-of-00051.safetensors", - "model.layers.5.mlp.gate.scales": "model-00005-of-00051.safetensors", + "model.layers.5.mlp.gate.weight": "model-00005-of-00051.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", "model.layers.5.self_attn.k_norm.weight": "model-00005-of-00051.safetensors", "model.layers.5.self_attn.k_proj.qweight": "model-00005-of-00051.safetensors", @@ -68561,9 +68469,7 @@ "model.layers.50.mlp.experts.99.up_proj.qweight": "model-00041-of-00051.safetensors", "model.layers.50.mlp.experts.99.up_proj.qzeros": "model-00041-of-00051.safetensors", "model.layers.50.mlp.experts.99.up_proj.scales": "model-00041-of-00051.safetensors", - "model.layers.50.mlp.gate.qweight": "model-00041-of-00051.safetensors", - "model.layers.50.mlp.gate.qzeros": "model-00041-of-00051.safetensors", - "model.layers.50.mlp.gate.scales": "model-00041-of-00051.safetensors", + "model.layers.50.mlp.gate.weight": "model-00041-of-00051.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", "model.layers.50.self_attn.k_norm.weight": "model-00041-of-00051.safetensors", "model.layers.50.self_attn.k_proj.qweight": "model-00041-of-00051.safetensors", @@ -70020,9 +69926,7 @@ "model.layers.51.mlp.experts.99.up_proj.qweight": "model-00042-of-00051.safetensors", "model.layers.51.mlp.experts.99.up_proj.qzeros": "model-00042-of-00051.safetensors", "model.layers.51.mlp.experts.99.up_proj.scales": "model-00042-of-00051.safetensors", - "model.layers.51.mlp.gate.qweight": "model-00042-of-00051.safetensors", - "model.layers.51.mlp.gate.qzeros": "model-00042-of-00051.safetensors", - "model.layers.51.mlp.gate.scales": "model-00042-of-00051.safetensors", + "model.layers.51.mlp.gate.weight": "model-00042-of-00051.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", "model.layers.51.self_attn.k_norm.weight": "model-00042-of-00051.safetensors", "model.layers.51.self_attn.k_proj.qweight": "model-00042-of-00051.safetensors", @@ -71479,9 +71383,7 @@ "model.layers.52.mlp.experts.99.up_proj.qweight": "model-00043-of-00051.safetensors", "model.layers.52.mlp.experts.99.up_proj.qzeros": "model-00043-of-00051.safetensors", "model.layers.52.mlp.experts.99.up_proj.scales": "model-00043-of-00051.safetensors", - "model.layers.52.mlp.gate.qweight": "model-00043-of-00051.safetensors", - "model.layers.52.mlp.gate.qzeros": "model-00043-of-00051.safetensors", - "model.layers.52.mlp.gate.scales": "model-00043-of-00051.safetensors", + "model.layers.52.mlp.gate.weight": "model-00043-of-00051.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", "model.layers.52.self_attn.k_norm.weight": "model-00043-of-00051.safetensors", "model.layers.52.self_attn.k_proj.qweight": "model-00043-of-00051.safetensors", @@ -72938,9 +72840,7 @@ "model.layers.53.mlp.experts.99.up_proj.qweight": "model-00044-of-00051.safetensors", "model.layers.53.mlp.experts.99.up_proj.qzeros": "model-00044-of-00051.safetensors", "model.layers.53.mlp.experts.99.up_proj.scales": "model-00044-of-00051.safetensors", - "model.layers.53.mlp.gate.qweight": "model-00043-of-00051.safetensors", - "model.layers.53.mlp.gate.qzeros": "model-00043-of-00051.safetensors", - "model.layers.53.mlp.gate.scales": "model-00043-of-00051.safetensors", + "model.layers.53.mlp.gate.weight": "model-00043-of-00051.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00044-of-00051.safetensors", "model.layers.53.self_attn.k_norm.weight": "model-00043-of-00051.safetensors", "model.layers.53.self_attn.k_proj.qweight": "model-00043-of-00051.safetensors", @@ -74397,9 +74297,7 @@ "model.layers.54.mlp.experts.99.up_proj.qweight": "model-00045-of-00051.safetensors", "model.layers.54.mlp.experts.99.up_proj.qzeros": "model-00045-of-00051.safetensors", "model.layers.54.mlp.experts.99.up_proj.scales": "model-00045-of-00051.safetensors", - "model.layers.54.mlp.gate.qweight": "model-00044-of-00051.safetensors", - "model.layers.54.mlp.gate.qzeros": "model-00044-of-00051.safetensors", - "model.layers.54.mlp.gate.scales": "model-00044-of-00051.safetensors", + "model.layers.54.mlp.gate.weight": "model-00044-of-00051.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", "model.layers.54.self_attn.k_norm.weight": "model-00044-of-00051.safetensors", "model.layers.54.self_attn.k_proj.qweight": "model-00044-of-00051.safetensors", @@ -75856,9 +75754,7 @@ "model.layers.55.mlp.experts.99.up_proj.qweight": "model-00046-of-00051.safetensors", "model.layers.55.mlp.experts.99.up_proj.qzeros": "model-00046-of-00051.safetensors", "model.layers.55.mlp.experts.99.up_proj.scales": "model-00046-of-00051.safetensors", - "model.layers.55.mlp.gate.qweight": "model-00045-of-00051.safetensors", - "model.layers.55.mlp.gate.qzeros": "model-00045-of-00051.safetensors", - "model.layers.55.mlp.gate.scales": "model-00045-of-00051.safetensors", + "model.layers.55.mlp.gate.weight": "model-00045-of-00051.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", "model.layers.55.self_attn.k_norm.weight": "model-00045-of-00051.safetensors", "model.layers.55.self_attn.k_proj.qweight": "model-00045-of-00051.safetensors", @@ -77315,9 +77211,7 @@ "model.layers.56.mlp.experts.99.up_proj.qweight": "model-00046-of-00051.safetensors", "model.layers.56.mlp.experts.99.up_proj.qzeros": "model-00046-of-00051.safetensors", "model.layers.56.mlp.experts.99.up_proj.scales": "model-00046-of-00051.safetensors", - "model.layers.56.mlp.gate.qweight": "model-00046-of-00051.safetensors", - "model.layers.56.mlp.gate.qzeros": "model-00046-of-00051.safetensors", - "model.layers.56.mlp.gate.scales": "model-00046-of-00051.safetensors", + "model.layers.56.mlp.gate.weight": "model-00046-of-00051.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", "model.layers.56.self_attn.k_norm.weight": "model-00046-of-00051.safetensors", "model.layers.56.self_attn.k_proj.qweight": "model-00046-of-00051.safetensors", @@ -78774,9 +78668,7 @@ "model.layers.57.mlp.experts.99.up_proj.qweight": "model-00047-of-00051.safetensors", "model.layers.57.mlp.experts.99.up_proj.qzeros": "model-00047-of-00051.safetensors", "model.layers.57.mlp.experts.99.up_proj.scales": "model-00047-of-00051.safetensors", - "model.layers.57.mlp.gate.qweight": "model-00047-of-00051.safetensors", - "model.layers.57.mlp.gate.qzeros": "model-00047-of-00051.safetensors", - "model.layers.57.mlp.gate.scales": "model-00047-of-00051.safetensors", + "model.layers.57.mlp.gate.weight": "model-00047-of-00051.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", "model.layers.57.self_attn.k_norm.weight": "model-00047-of-00051.safetensors", "model.layers.57.self_attn.k_proj.qweight": "model-00047-of-00051.safetensors", @@ -80233,9 +80125,7 @@ "model.layers.58.mlp.experts.99.up_proj.qweight": "model-00048-of-00051.safetensors", "model.layers.58.mlp.experts.99.up_proj.qzeros": "model-00048-of-00051.safetensors", "model.layers.58.mlp.experts.99.up_proj.scales": "model-00048-of-00051.safetensors", - "model.layers.58.mlp.gate.qweight": "model-00047-of-00051.safetensors", - "model.layers.58.mlp.gate.qzeros": "model-00047-of-00051.safetensors", - "model.layers.58.mlp.gate.scales": "model-00047-of-00051.safetensors", + "model.layers.58.mlp.gate.weight": "model-00047-of-00051.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00048-of-00051.safetensors", "model.layers.58.self_attn.k_norm.weight": "model-00047-of-00051.safetensors", "model.layers.58.self_attn.k_proj.qweight": "model-00047-of-00051.safetensors", @@ -81692,9 +81582,7 @@ "model.layers.59.mlp.experts.99.up_proj.qweight": "model-00049-of-00051.safetensors", "model.layers.59.mlp.experts.99.up_proj.qzeros": "model-00049-of-00051.safetensors", "model.layers.59.mlp.experts.99.up_proj.scales": "model-00049-of-00051.safetensors", - "model.layers.59.mlp.gate.qweight": "model-00048-of-00051.safetensors", - "model.layers.59.mlp.gate.qzeros": "model-00048-of-00051.safetensors", - "model.layers.59.mlp.gate.scales": "model-00048-of-00051.safetensors", + "model.layers.59.mlp.gate.weight": "model-00048-of-00051.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", "model.layers.59.self_attn.k_norm.weight": "model-00048-of-00051.safetensors", "model.layers.59.self_attn.k_proj.qweight": "model-00048-of-00051.safetensors", @@ -83151,9 +83039,7 @@ "model.layers.6.mlp.experts.99.up_proj.qweight": "model-00006-of-00051.safetensors", "model.layers.6.mlp.experts.99.up_proj.qzeros": "model-00006-of-00051.safetensors", "model.layers.6.mlp.experts.99.up_proj.scales": "model-00006-of-00051.safetensors", - "model.layers.6.mlp.gate.qweight": "model-00006-of-00051.safetensors", - "model.layers.6.mlp.gate.qzeros": "model-00006-of-00051.safetensors", - "model.layers.6.mlp.gate.scales": "model-00006-of-00051.safetensors", + "model.layers.6.mlp.gate.weight": "model-00006-of-00051.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", "model.layers.6.self_attn.k_norm.weight": "model-00006-of-00051.safetensors", "model.layers.6.self_attn.k_proj.qweight": "model-00006-of-00051.safetensors", @@ -84610,9 +84496,7 @@ "model.layers.60.mlp.experts.99.up_proj.qweight": "model-00050-of-00051.safetensors", "model.layers.60.mlp.experts.99.up_proj.qzeros": "model-00050-of-00051.safetensors", "model.layers.60.mlp.experts.99.up_proj.scales": "model-00050-of-00051.safetensors", - "model.layers.60.mlp.gate.qweight": "model-00049-of-00051.safetensors", - "model.layers.60.mlp.gate.qzeros": "model-00049-of-00051.safetensors", - "model.layers.60.mlp.gate.scales": "model-00049-of-00051.safetensors", + "model.layers.60.mlp.gate.weight": "model-00049-of-00051.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", "model.layers.60.self_attn.k_norm.weight": "model-00049-of-00051.safetensors", "model.layers.60.self_attn.k_proj.qweight": "model-00049-of-00051.safetensors", @@ -86069,9 +85953,7 @@ "model.layers.61.mlp.experts.99.up_proj.qweight": "model-00050-of-00051.safetensors", "model.layers.61.mlp.experts.99.up_proj.qzeros": "model-00050-of-00051.safetensors", "model.layers.61.mlp.experts.99.up_proj.scales": "model-00050-of-00051.safetensors", - "model.layers.61.mlp.gate.qweight": "model-00050-of-00051.safetensors", - "model.layers.61.mlp.gate.qzeros": "model-00050-of-00051.safetensors", - "model.layers.61.mlp.gate.scales": "model-00050-of-00051.safetensors", + "model.layers.61.mlp.gate.weight": "model-00050-of-00051.safetensors", "model.layers.61.post_attention_layernorm.weight": "model-00051-of-00051.safetensors", "model.layers.61.self_attn.k_norm.weight": "model-00050-of-00051.safetensors", "model.layers.61.self_attn.k_proj.qweight": "model-00050-of-00051.safetensors", @@ -87528,9 +87410,7 @@ "model.layers.7.mlp.experts.99.up_proj.qweight": "model-00007-of-00051.safetensors", "model.layers.7.mlp.experts.99.up_proj.qzeros": "model-00007-of-00051.safetensors", "model.layers.7.mlp.experts.99.up_proj.scales": "model-00007-of-00051.safetensors", - "model.layers.7.mlp.gate.qweight": "model-00007-of-00051.safetensors", - "model.layers.7.mlp.gate.qzeros": "model-00007-of-00051.safetensors", - "model.layers.7.mlp.gate.scales": "model-00007-of-00051.safetensors", + "model.layers.7.mlp.gate.weight": "model-00007-of-00051.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00051.safetensors", "model.layers.7.self_attn.k_norm.weight": "model-00007-of-00051.safetensors", "model.layers.7.self_attn.k_proj.qweight": "model-00006-of-00051.safetensors", @@ -88987,9 +88867,7 @@ "model.layers.8.mlp.experts.99.up_proj.qweight": "model-00008-of-00051.safetensors", "model.layers.8.mlp.experts.99.up_proj.qzeros": "model-00008-of-00051.safetensors", "model.layers.8.mlp.experts.99.up_proj.scales": "model-00008-of-00051.safetensors", - "model.layers.8.mlp.gate.qweight": "model-00007-of-00051.safetensors", - "model.layers.8.mlp.gate.qzeros": "model-00007-of-00051.safetensors", - "model.layers.8.mlp.gate.scales": "model-00007-of-00051.safetensors", + "model.layers.8.mlp.gate.weight": "model-00007-of-00051.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00008-of-00051.safetensors", "model.layers.8.self_attn.k_norm.weight": "model-00007-of-00051.safetensors", "model.layers.8.self_attn.k_proj.qweight": "model-00007-of-00051.safetensors", @@ -90446,9 +90324,7 @@ "model.layers.9.mlp.experts.99.up_proj.qweight": "model-00009-of-00051.safetensors", "model.layers.9.mlp.experts.99.up_proj.qzeros": "model-00009-of-00051.safetensors", "model.layers.9.mlp.experts.99.up_proj.scales": "model-00009-of-00051.safetensors", - "model.layers.9.mlp.gate.qweight": "model-00008-of-00051.safetensors", - "model.layers.9.mlp.gate.qzeros": "model-00008-of-00051.safetensors", - "model.layers.9.mlp.gate.scales": "model-00008-of-00051.safetensors", + "model.layers.9.mlp.gate.weight": "model-00008-of-00051.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", "model.layers.9.self_attn.k_norm.weight": "model-00008-of-00051.safetensors", "model.layers.9.self_attn.k_proj.qweight": "model-00008-of-00051.safetensors", diff --git a/quantization_config.json b/quantization_config.json index 9fe625551ea1b6da7efc9e411a340a4291e01136..ac3dab86089fcfc0b3c1854423f0529f2ce26210 100644 --- a/quantization_config.json +++ b/quantization_config.json @@ -4,8 +4,197 @@ "sym": true, "data_type": "int", "nsamples": 512, + "low_gpu_mem_usage": true, "dataset": "github-code-clean", - "autoround_version": "0.6.1", + "autoround_version": "0.6.1.dev", "quant_method": "auto-round", - "packing_format": "auto_round:auto_gptq" + "packing_format": "auto_round:auto_gptq", + "extra_config": { + "model.layers.0.mlp.gate": { + "bits": 16 + }, + "model.layers.1.mlp.gate": { + "bits": 16 + }, + "model.layers.2.mlp.gate": { + "bits": 16 + }, + "model.layers.3.mlp.gate": { + "bits": 16 + }, + "model.layers.4.mlp.gate": { + "bits": 16 + }, + "model.layers.5.mlp.gate": { + "bits": 16 + }, + "model.layers.6.mlp.gate": { + "bits": 16 + }, + "model.layers.7.mlp.gate": { + "bits": 16 + }, + "model.layers.8.mlp.gate": { + "bits": 16 + }, + "model.layers.9.mlp.gate": { + "bits": 16 + }, + "model.layers.10.mlp.gate": { + "bits": 16 + }, + "model.layers.11.mlp.gate": { + "bits": 16 + }, + "model.layers.12.mlp.gate": { + "bits": 16 + }, + "model.layers.13.mlp.gate": { + "bits": 16 + }, + "model.layers.14.mlp.gate": { + "bits": 16 + }, + "model.layers.15.mlp.gate": { + "bits": 16 + }, + "model.layers.16.mlp.gate": { + "bits": 16 + }, + "model.layers.17.mlp.gate": { + "bits": 16 + }, + "model.layers.18.mlp.gate": { + "bits": 16 + }, + "model.layers.19.mlp.gate": { + "bits": 16 + }, + "model.layers.20.mlp.gate": { + "bits": 16 + }, + "model.layers.21.mlp.gate": { + "bits": 16 + }, + "model.layers.22.mlp.gate": { + "bits": 16 + }, + "model.layers.23.mlp.gate": { + "bits": 16 + }, + "model.layers.24.mlp.gate": { + "bits": 16 + }, + "model.layers.25.mlp.gate": { + "bits": 16 + }, + "model.layers.26.mlp.gate": { + "bits": 16 + }, + "model.layers.27.mlp.gate": { + "bits": 16 + }, + "model.layers.28.mlp.gate": { + "bits": 16 + }, + "model.layers.29.mlp.gate": { + "bits": 16 + }, + "model.layers.30.mlp.gate": { + "bits": 16 + }, + "model.layers.31.mlp.gate": { + "bits": 16 + }, + "model.layers.32.mlp.gate": { + "bits": 16 + }, + "model.layers.33.mlp.gate": { + "bits": 16 + }, + "model.layers.34.mlp.gate": { + "bits": 16 + }, + "model.layers.35.mlp.gate": { + "bits": 16 + }, + "model.layers.36.mlp.gate": { + "bits": 16 + }, + "model.layers.37.mlp.gate": { + "bits": 16 + }, + "model.layers.38.mlp.gate": { + "bits": 16 + }, + "model.layers.39.mlp.gate": { + "bits": 16 + }, + "model.layers.40.mlp.gate": { + "bits": 16 + }, + "model.layers.41.mlp.gate": { + "bits": 16 + }, + "model.layers.42.mlp.gate": { + "bits": 16 + }, + "model.layers.43.mlp.gate": { + "bits": 16 + }, + "model.layers.44.mlp.gate": { + "bits": 16 + }, + "model.layers.45.mlp.gate": { + "bits": 16 + }, + "model.layers.46.mlp.gate": { + "bits": 16 + }, + "model.layers.47.mlp.gate": { + "bits": 16 + }, + "model.layers.48.mlp.gate": { + "bits": 16 + }, + "model.layers.49.mlp.gate": { + "bits": 16 + }, + "model.layers.50.mlp.gate": { + "bits": 16 + }, + "model.layers.51.mlp.gate": { + "bits": 16 + }, + "model.layers.52.mlp.gate": { + "bits": 16 + }, + "model.layers.53.mlp.gate": { + "bits": 16 + }, + "model.layers.54.mlp.gate": { + "bits": 16 + }, + "model.layers.55.mlp.gate": { + "bits": 16 + }, + "model.layers.56.mlp.gate": { + "bits": 16 + }, + "model.layers.57.mlp.gate": { + "bits": 16 + }, + "model.layers.58.mlp.gate": { + "bits": 16 + }, + "model.layers.59.mlp.gate": { + "bits": 16 + }, + "model.layers.60.mlp.gate": { + "bits": 16 + }, + "model.layers.61.mlp.gate": { + "bits": 16 + } + } } \ No newline at end of file