diff --git a/README.md b/README.md index 18e0d585e6b5795bbbc1d52b358116a7e0ca09e1..cab565eac75abf7b89cf0885ec314897a399e776 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ ---- -license: mit -pipeline_tag: text-generation -base_model: deepseek-ai/DeepSeek-V3-0324 -library_name: mlx -tags: -- mlx ---- - -# mlx-community/DeepSeek-V3-0324-4bit - -This model [mlx-community/DeepSeek-V3-0324-4bit](https://huggingface.co/mlx-community/DeepSeek-V3-0324-4bit) was -converted to MLX format from [deepseek-ai/DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324) +--- +license: mit +library_name: mlx +pipeline_tag: text-generation +base_model: deepseek-ai/DeepSeek-v3-0324 +tags: +- mlx +--- + +# mlx-community/DeepSeek-v3-0324-4bit + +This model [mlx-community/DeepSeek-v3-0324-4bit](https://huggingface.co/mlx-community/DeepSeek-v3-0324-4bit) was +converted to MLX format from [deepseek-ai/DeepSeek-v3-0324](https://huggingface.co/deepseek-ai/DeepSeek-v3-0324) using mlx-lm version **0.22.2**. ## Use with mlx @@ -22,7 +22,7 @@ pip install mlx-lm ```python from mlx_lm import load, generate -model, tokenizer = load("mlx-community/DeepSeek-V3-0324-4bit") +model, tokenizer = load("mlx-community/DeepSeek-v3-0324-4bit") prompt = "hello" diff --git a/config.json b/config.json index c7e51453b5a9ed6ecbd23e83672e2bb02f740415..7b1cdfc2bd3a4fabcb4a15620904d2d16c448679 100644 --- a/config.json +++ b/config.json @@ -9,7 +9,6 @@ "AutoModel": "modeling_deepseek.DeepseekV3Model", "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM" }, - "aux_loss_alpha": 0.001, "bos_token_id": 0, "eos_token_id": 1, "ep_size": 1, @@ -32,7 +31,6 @@ "num_hidden_layers": 61, "num_key_value_heads": 128, "num_nextn_predict_layers": 1, - "pretraining_tp": 1, "q_lora_rank": 1536, "qk_nope_head_dim": 128, "qk_rope_head_dim": 64, @@ -57,7 +55,6 @@ "rope_theta": 10000, "routed_scaling_factor": 2.5, "scoring_func": "sigmoid", - "seq_aux": true, "tie_word_embeddings": false, "topk_group": 4, "topk_method": "noaux_tc", diff --git a/configuration_deepseek.py b/configuration_deepseek.py index f2a42479fd055c4fe236178953965f6353d16b7f..f549f2b17d9a20fef5c26cb7a3977fafecc9fe79 100644 --- a/configuration_deepseek.py +++ b/configuration_deepseek.py @@ -82,11 +82,6 @@ class DeepseekV3Config(PretrainedConfig): Beginning of stream token id. eos_token_id (`int`, *optional*, defaults to 2): End of stream token id. - pretraining_tp (`int`, *optional*, defaults to 1): - Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this - document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is - necessary to ensure exact reproducibility of the pretraining results. Please refer to [this - issue](https://github.com/pytorch/pytorch/issues/76232). tie_word_embeddings (`bool`, *optional*, defaults to `False`): Whether to tie weight embeddings rope_theta (`float`, *optional*, defaults to 10000.0): @@ -141,8 +136,6 @@ class DeepseekV3Config(PretrainedConfig): first_k_dense_replace = 3, norm_topk_prob = True, scoring_func = 'sigmoid', - aux_loss_alpha = 0.001, - seq_aux = True, hidden_act="silu", max_position_embeddings=4096, initializer_range=0.02, @@ -151,7 +144,6 @@ class DeepseekV3Config(PretrainedConfig): pad_token_id=None, bos_token_id=0, eos_token_id=1, - pretraining_tp=1, tie_word_embeddings=False, rope_theta=10000.0, rope_scaling=None, @@ -184,8 +176,6 @@ class DeepseekV3Config(PretrainedConfig): self.first_k_dense_replace = first_k_dense_replace self.norm_topk_prob = norm_topk_prob self.scoring_func = scoring_func - self.aux_loss_alpha = aux_loss_alpha - self.seq_aux = seq_aux # for backward compatibility if num_key_value_heads is None: num_key_value_heads = num_attention_heads @@ -194,7 +184,6 @@ class DeepseekV3Config(PretrainedConfig): self.hidden_act = hidden_act self.initializer_range = initializer_range self.rms_norm_eps = rms_norm_eps - self.pretraining_tp = pretraining_tp self.use_cache = use_cache self.rope_theta = rope_theta self.rope_scaling = rope_scaling diff --git a/model-00001-of-00088.safetensors b/model-00001-of-00088.safetensors index a0925a356740ee6aab8faf84d47674698f97e222..2c401c5c67dce725e1906fe88e59eee7deaa0be1 100644 --- a/model-00001-of-00088.safetensors +++ b/model-00001-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:420cefc97a209bc7dca275dd0323033580936bae333f040de09259319ee3d6d2 +oid sha256:2c4c331915574139c5ac8dafee748d8fcb3e4c6a960824b98110d9268568cdd2 size 3725148265 diff --git a/model-00002-of-00088.safetensors b/model-00002-of-00088.safetensors index 07d1957f78c19c1c6bae6f635b15409763f5428f..b0e1d9c02db2cfc90a1209ac75a81969a3a7729e 100644 --- a/model-00002-of-00088.safetensors +++ b/model-00002-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25d56c3409710f0df7596a5e8b084224eeec1a567c5ac87c2889eaef15d78751 -size 4361585379 +oid sha256:4c5255f35ff5eaab3c5b6d5e1d3c184731d66b22b266caaa54b036c8a8d52384 +size 4361585891 diff --git a/model-00003-of-00088.safetensors b/model-00003-of-00088.safetensors index c28517cc24c4b1f3e269ec0e1f6f9cbe711ffb9e..b24e3926528babc2fafdba03a7768bd1a54e8fbc 100644 --- a/model-00003-of-00088.safetensors +++ b/model-00003-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7cea11170512349c000d6922c83cbcc59c4735a801ee1c51c2f55c22f5af23c9 +oid sha256:3d5a6afc3553763edb09b71c8d5a44ec6075f48a6735d9e396d00b05e2f41e2c size 4227859218 diff --git a/model-00004-of-00088.safetensors b/model-00004-of-00088.safetensors index 6218e38dfad7df2aa75f5b42ffc2b6773a4df575..fa26ab0d81d751d1924c0ca224ba55a03208abcc 100644 --- a/model-00004-of-00088.safetensors +++ b/model-00004-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee031183293964e596ff4ffcae51847f9a1f8121bca7b03347eab77bebe8d7c0 -size 4361585451 +oid sha256:3cdf53f479116c1171638c721025685020b9548c362af7583689aa4ad69529bb +size 4361585963 diff --git a/model-00005-of-00088.safetensors b/model-00005-of-00088.safetensors index 002a55578deb1ab5e258a1027b184ac19339c514..07f70cf9b06b74cad47405eda4651079bf64b221 100644 --- a/model-00005-of-00088.safetensors +++ b/model-00005-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c43bc467be082473d871d806baf8a1e1d866e3a5ec7bb032945047e84c0ec36 -size 4361585389 +oid sha256:3ae6277488338917c5a464d5e8fd5c7c4dbfbe723a62922df203686a43fd7887 +size 4361585901 diff --git a/model-00006-of-00088.safetensors b/model-00006-of-00088.safetensors index 18ad27f3061b2060e7707d090edb5f6645666a3d..2a52f5bfe646e182369d7939fa6e23b6a8356b81 100644 --- a/model-00006-of-00088.safetensors +++ b/model-00006-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47e3a1a828af5394fd488e47c7e4c61e7bb101bcce14648018ee650bc1e3aaa9 +oid sha256:5825746fe2bc0f8f810efb39ced8a0208d212e95cbf884b8236570ed25cadac6 size 4227859214 diff --git a/model-00007-of-00088.safetensors b/model-00007-of-00088.safetensors index df7cef95e6440bc25df3c80a98af074b9937003b..5fbc9483c78ff26eafa50cc1e5fb577212baf942 100644 --- a/model-00007-of-00088.safetensors +++ b/model-00007-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fec87142a75484037bb6892ccaaafe9d94d20dddb78404d5af19e0c0dc970c3 -size 4361585449 +oid sha256:489f16102289544ad7016b6e13e908b96969212f545402c9948af61ab3292e2a +size 4361585961 diff --git a/model-00008-of-00088.safetensors b/model-00008-of-00088.safetensors index 164b169ad3a3ad41c2c437e6e952ebaff269dfe0..2bef018bee9510d7dce5d1177053a16b3a119308 100644 --- a/model-00008-of-00088.safetensors +++ b/model-00008-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10991df4887105b227de9a8a1061d8e23742c46399ad21122fcad4044c9bd1f8 -size 4361585357 +oid sha256:55d2b8c569b90aff3d07f13b5012616ac39d351ad5b981553e8bf93b7ed25897 +size 4361585869 diff --git a/model-00009-of-00088.safetensors b/model-00009-of-00088.safetensors index 4b7484d0b39f7978a5ae344e208f5247527f883a..59e8f6fb1a898c854f0da1d98c0baa0962e22a5f 100644 --- a/model-00009-of-00088.safetensors +++ b/model-00009-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:266eb4800b3c1770d17cdda249d32ebb59a53640cd5bd918859576c7aa7bdf8c +oid sha256:a88d1c2793b418c78946308827addf2baa9f5697259f855eda505e872b4885f6 size 4227859218 diff --git a/model-00010-of-00088.safetensors b/model-00010-of-00088.safetensors index c1f972b065cdd92ab8b322607596dd1411593e48..261780266be63469e44bf528a99ef4a45fd8c11b 100644 --- a/model-00010-of-00088.safetensors +++ b/model-00010-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a96f79d5049a4a12fe0d429798d6c5bfb7e2826ab978edef420df076e315bc4f -size 4361585375 +oid sha256:f6ff44864eda2d49217ca0779d7c08b1da1d1c02c035eb8d78e6114320b63f76 +size 4361585887 diff --git a/model-00011-of-00088.safetensors b/model-00011-of-00088.safetensors index 06205ef09a880a0422069e8a0c2c705efe98502f..e4bd182099a47310cabd64b25908c6c96656d43b 100644 --- a/model-00011-of-00088.safetensors +++ b/model-00011-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:080f4862600e2dbb3039560b6f09ad9df483d98dd6f872a1e1c1c5af34eef2d4 -size 4361585378 +oid sha256:ac186abf0822dece299c4d99ea8230d4a98eaa264def4314acc569e0c7fc4114 +size 4361585890 diff --git a/model-00012-of-00088.safetensors b/model-00012-of-00088.safetensors index 95ab2ce064f174b7946ca70475b6dea2f7a25089..464012e547bd048c11ba371e9106810e7dc7a9ee 100644 --- a/model-00012-of-00088.safetensors +++ b/model-00012-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59a5be6328390fb55ffa385bb963ee5de5f7b9414f65fe95ba2953dbf2d08d9a +oid sha256:4ad58c01b37e2d9334bdf11bf79e5df4cbad9a44c5550b374f5eda094d59e646 size 4227859224 diff --git a/model-00013-of-00088.safetensors b/model-00013-of-00088.safetensors index 25ce251612ca0a518a82385675194223e03f3b9f..6b2f2a2960be994a78fe171364003c9cb22cc09e 100644 --- a/model-00013-of-00088.safetensors +++ b/model-00013-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:441c68041831a3c63e7824d20cbae014b7a742572ab2cfd1bf10ad7a63a5e72e -size 4361585461 +oid sha256:35797d56d5691ff31584924971fd26a87e695b2079f16a7506916422223310a1 +size 4361585973 diff --git a/model-00014-of-00088.safetensors b/model-00014-of-00088.safetensors index feb6c6dbbbba3b24c89a53a34466a6ace42b6b76..4132deb6ccb39015748b7684a5befac68fc91fdc 100644 --- a/model-00014-of-00088.safetensors +++ b/model-00014-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0e5a4209228dff14ab65b23a9738f99acdb18e7fa4e4b50c1a0b87dd6a24c30 -size 4361585391 +oid sha256:fe571eab8db9f37c6ace29d62ae9c92bc419e7494e9373701d6b5751e9da85c0 +size 4361585903 diff --git a/model-00015-of-00088.safetensors b/model-00015-of-00088.safetensors index 3cff264feac08070539d82dd5d235db671761c5e..d29ec4305fdb65e67a3a6a3bb622f3c907d26fdf 100644 --- a/model-00015-of-00088.safetensors +++ b/model-00015-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae8a52bf69a23ed355c9580dca55a77f3f04538a8255794bfaef8b7d0ff96e58 +oid sha256:91866b8b087db695b6cc08ef96f6507f7b82e2228d2d095de9c6382bd7997318 size 4227859224 diff --git a/model-00016-of-00088.safetensors b/model-00016-of-00088.safetensors index a49c2381d581ab64d34d060835d22399b36a6e01..ec3efa10a5907ad33db479b927b787eadba523ec 100644 --- a/model-00016-of-00088.safetensors +++ b/model-00016-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab6181a065940f73c9bbeec2e9960735734ece3ec3f8a6d522b6c2e7337c435b -size 4361585455 +oid sha256:edb1becf27c675fdc9eeef2a4822b4f18c2d9b8dd1892d673ee95e391307bcfb +size 4361585967 diff --git a/model-00017-of-00088.safetensors b/model-00017-of-00088.safetensors index aded94038429c00f9c98e213e361858419b18539..c2d352a8f08a46b1efad71d0ed5426e92c3028ce 100644 --- a/model-00017-of-00088.safetensors +++ b/model-00017-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e0515fdd973feac7c3cb7ae6d777b8daed28ec872044bfbdbbc4a40a05c55f4 -size 4361585389 +oid sha256:47b49c2748913685fb5b64a916e58b4f0b2024467f63c5a509cf67a3aba14984 +size 4361585901 diff --git a/model-00018-of-00088.safetensors b/model-00018-of-00088.safetensors index bbadd0d7b79a8564e638e74adfd07d671a76f1a2..00f3f5be92bdb22f77e0447e27b9a66c6c0d5848 100644 --- a/model-00018-of-00088.safetensors +++ b/model-00018-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9719ce5848382650ae4a8aeb66632617b862ddceb921a5774522c650c7bc0412 +oid sha256:4a7f9a5c34b0dedf72a81cc344b1491fa00775971d1cd082f1aeb2a4bcdc87a2 size 4227859220 diff --git a/model-00019-of-00088.safetensors b/model-00019-of-00088.safetensors index c4388342f1ddc9dd514d21c993145a3547dc281b..8c2c1f58e80c82900052060251c730f13d701ced 100644 --- a/model-00019-of-00088.safetensors +++ b/model-00019-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5785a9d86fac01fc683c09b53f8b3eabd011e0c598cb15cdc4381e996d798b0a -size 4361585463 +oid sha256:f683e74041520ce95103eb786e957d957a71fc0274263e5f54f87daca3724f58 +size 4361585975 diff --git a/model-00020-of-00088.safetensors b/model-00020-of-00088.safetensors index c1d8e1bcaf1391c3bf5d4010ce8574e2f5794fbe..20099d0c948c2f92256b530bb8215bb9fe004edf 100644 --- a/model-00020-of-00088.safetensors +++ b/model-00020-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cca66fe3e628db32c083934efd30eeb29464628535bfea25d9e47ac1bece553d -size 4361585423 +oid sha256:ba991d74f35a7044d83af501fc5bbd1f50f2dd29c3b1a4727ef79d8e99ee414e +size 4361585935 diff --git a/model-00021-of-00088.safetensors b/model-00021-of-00088.safetensors index 34a58dd360dbcf44baa14f1743b88763c51b715a..d88c4c354c41927ac7cd85b1cc786b684f8f07ed 100644 --- a/model-00021-of-00088.safetensors +++ b/model-00021-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2894009b4a34c0cf3882c3d386486162f3713c2bb399fd1ed895d7ec27002c35 +oid sha256:5a05a3a2de06fc688c304f31ab3791f6301f1d160bf1d32c2a46decdadc03230 size 4227859224 diff --git a/model-00022-of-00088.safetensors b/model-00022-of-00088.safetensors index 20cc45f8035e637107c6f52cd2209b84a3797398..667ab16fe2fa94bb846e18b4ee2225433d8737f9 100644 --- a/model-00022-of-00088.safetensors +++ b/model-00022-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:380fb312d00798273edecae944448fe553128c3e938f997c53cc5c04c1c08e2a -size 4361585441 +oid sha256:95c49596dacbf68e4a09cf56f285be0809178c1dc92add3ca9c4deb678ac4e12 +size 4361585953 diff --git a/model-00023-of-00088.safetensors b/model-00023-of-00088.safetensors index 7180dac1342cbdb9db1b8c7e6589e0d2f06ad2a5..87eaa70c1cb5df70c53df4875d44bb5d0c2821e6 100644 --- a/model-00023-of-00088.safetensors +++ b/model-00023-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0fb77a4ba2977f0e23edbe7c24b3439ba7d2bd2adbd836905658f38037d745b5 -size 4361585399 +oid sha256:90d574126c1093a287046af5bc9ffb7eaff4209ec249f00a67327ff3352e144e +size 4361585911 diff --git a/model-00024-of-00088.safetensors b/model-00024-of-00088.safetensors index 010eddb51e322164907c913b30bc827da5d362cc..250dc4de687f8560d62d67a57bf3a46bb6c96a4b 100644 --- a/model-00024-of-00088.safetensors +++ b/model-00024-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a56cf785ef9cc09c0e2ef65b05d0232fcbfff351d46fce99e3c41a8ebce22652 +oid sha256:1be72c3ef27006fe760f0837c192a4ee7f262fdccd58704fa218c75cd798a6ff size 4227859226 diff --git a/model-00025-of-00088.safetensors b/model-00025-of-00088.safetensors index 68dadd50addfca6cbe6285aa343c766eef7006de..7ac4a9c42941bdc73fcba48a4c8d981d7d8e795f 100644 --- a/model-00025-of-00088.safetensors +++ b/model-00025-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95e9ec21964726e9d573bb61d7e9263d4b4ee53f2ee2c77d42621f75f2155cf1 -size 4361585485 +oid sha256:82a17d128c561f0e20ac0c749c7b285b7836eb18bd4afb9b9f2abeea47e049e4 +size 4361585997 diff --git a/model-00026-of-00088.safetensors b/model-00026-of-00088.safetensors index 1e328fe3a3122c104ac9c7b8ffe4e54c3ba37602..a9d4c201b76406a8c1ef0623ff20927fcb613f54 100644 --- a/model-00026-of-00088.safetensors +++ b/model-00026-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51453015b22b1f08bb7debc9ff9ebc2cc6be6cb4ccb46c7c73257a06abce16fb -size 4361585403 +oid sha256:df6f6c06927cf1d039883b27f45b43e9ed94ce9458152806d6582db541430f31 +size 4361585915 diff --git a/model-00027-of-00088.safetensors b/model-00027-of-00088.safetensors index e5ba6688c2bb28dc7f8c344caff9fef3b4e913b0..f8e6ef619a9a4cf5d14ac5c774475b0f706f5872 100644 --- a/model-00027-of-00088.safetensors +++ b/model-00027-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddbff7e32eca4c4d17b5f1db7b6c469acdbf43be2107ab45840d68fc750a67e8 +oid sha256:bafa57068931b7351d31402ac953c421fc86a6dc46b9c6e7487826119e77110e size 4227859220 diff --git a/model-00028-of-00088.safetensors b/model-00028-of-00088.safetensors index cc12e08c024443913d29aba01493efa4f2031f98..31aca80181f79593ba5338a126e36289a83dd8bd 100644 --- a/model-00028-of-00088.safetensors +++ b/model-00028-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b65b05bba4495102a59ded3c52f5f5aded4e502dfafe56700e34b0ebf859f0d6 -size 4361585453 +oid sha256:c15bf7d53d50f173ddf4f747655159e646d1fd22919e42d072d593c79b1aa324 +size 4361585965 diff --git a/model-00029-of-00088.safetensors b/model-00029-of-00088.safetensors index 273be706d13c13979398145292da4bf81020393d..39b89cd79171ef582cf9506e8c1bce197454e890 100644 --- a/model-00029-of-00088.safetensors +++ b/model-00029-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e81f9a30da7ed8182741dd13736e34f905a96a6cb2fcd97c52bec63668f537b6 -size 4361585371 +oid sha256:63104e5b296aa062b6cf0aa4f4855c0d6d222795fe3221549e0b9bf8130b023f +size 4361585883 diff --git a/model-00030-of-00088.safetensors b/model-00030-of-00088.safetensors index 0d234f2c57950eb031c272d00c760d9f38e125ff..4b66025b3c2d0c0ecfc158168dd135e2dbc0042d 100644 --- a/model-00030-of-00088.safetensors +++ b/model-00030-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7facbce0cb6519947195268bd39489c53b9c9c1cb6b22d3ac2315e31d39247bd +oid sha256:ab19691ee0e93d869c3cf3fb0b6552a4e2e4da12e9c79981621d33d2c98d656e size 4227859222 diff --git a/model-00031-of-00088.safetensors b/model-00031-of-00088.safetensors index 943d1a489e33cab538bd7afbb9106c0682cde61d..13d4fa917e4785609ce5fc98d9f953c791c5a94f 100644 --- a/model-00031-of-00088.safetensors +++ b/model-00031-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76884c6e1cbf8ae2ec6abe1cc59f6db83e78e1e9a095eb64587c87731790b12b -size 4361585487 +oid sha256:e99810effc6e15260b83be47276f78f82ca59b518a161666816d3511ea4d0edc +size 4361585999 diff --git a/model-00032-of-00088.safetensors b/model-00032-of-00088.safetensors index e7ba74a89734350e0735c5f892219a7d1b0c4999..1e1e128b2d0aa166f7bd2d67c687e906f274c5f2 100644 --- a/model-00032-of-00088.safetensors +++ b/model-00032-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:324fc661a86eb6be2e704c2cc0ef37378ac453ddc53967c9d3ce61e50909c8ca -size 4361585433 +oid sha256:83ba49c6f166f8203d1613530c028005a64f6c9e806930fa405f04a72e775a3f +size 4361585945 diff --git a/model-00033-of-00088.safetensors b/model-00033-of-00088.safetensors index 9b5c7c1e21c4228bd4e0c6c638f8509dd54accb9..ceca502feda05a20062f26a241ff481fbb6272dd 100644 --- a/model-00033-of-00088.safetensors +++ b/model-00033-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3b8a9063ce5d06e4141193a98f206cd8419cfd51a84ede7ca4a756e9d896223 +oid sha256:706d7c0114fa0047413517321bc82eff23b88a2d29b578e43bbe95a2e6b289f2 size 4227859218 diff --git a/model-00034-of-00088.safetensors b/model-00034-of-00088.safetensors index 5865e0d4b6ed56a3e5a13fce7837adb2e7dc33cc..7122685edd000f2fdee9569c03d669239b398f36 100644 --- a/model-00034-of-00088.safetensors +++ b/model-00034-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c67d2eff1f740301904678fe5d766170db4d4c8d517e85cc6782a06a6be9c677 -size 4361585441 +oid sha256:17b1791bdb04db57b46dbf36770f339eaecaf60c12a8a9d882f5ba0e17ae180e +size 4361585953 diff --git a/model-00035-of-00088.safetensors b/model-00035-of-00088.safetensors index 7be4028a202cb69b58cc09f2df3d5a50dbe6995a..953067be0e7380d5a6cba6c3b41f71745c6942e7 100644 --- a/model-00035-of-00088.safetensors +++ b/model-00035-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5275be52f00466d3a7639e5b34a47b4e4897fa695c71f4c1c5f23e8ba46bb652 -size 4361585411 +oid sha256:1976a946fd9ca09f3f725cdc877e30a60940a5587a09c4a548a1182d3c5bb7c0 +size 4361585923 diff --git a/model-00036-of-00088.safetensors b/model-00036-of-00088.safetensors index 61acd621478641ffd9b54f6f567f404b1e60c691..dc376ace8283668f2cc585365d7bd3ced9ede4f9 100644 --- a/model-00036-of-00088.safetensors +++ b/model-00036-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9d557b4b7e21d0224cd1ea6d0ba9ab7ff6e5de139321dd3ad3583275579b3bf +oid sha256:38f3dcbfb878e7cfc3d9d8690448a3a634b38e7e66d047ca96f390a82194d028 size 4227859224 diff --git a/model-00037-of-00088.safetensors b/model-00037-of-00088.safetensors index 92ead9b2df3328b96cb8d81a6ca98e316a3ea7b9..45318f9517bffd485fb1b8f00387f374b953eb3d 100644 --- a/model-00037-of-00088.safetensors +++ b/model-00037-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4f3440118ae6f9f417d3bcbebba13f786752ddeec3ea781aa553fc94b9849bc -size 4361585487 +oid sha256:9d29e442541517efef4895cc8539e85444b714744bcf4adf5ddf3289367897ee +size 4361585999 diff --git a/model-00038-of-00088.safetensors b/model-00038-of-00088.safetensors index 5c108783ebf1e114ba2b662ed7582d4f2363e562..8393b941f072aecbfde77c0e98f62bc7387974dd 100644 --- a/model-00038-of-00088.safetensors +++ b/model-00038-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:deace3371631e6468aca15af2feb48628ebee1c5990d394dcd94038fd06a798a -size 4361585399 +oid sha256:8a635ae0cc54dd30f6a1380910c37665f3deedae12b2740d32516a0cacf9d36d +size 4361585911 diff --git a/model-00039-of-00088.safetensors b/model-00039-of-00088.safetensors index b441d6afab2a8e9f734ce26b07112517955e1eaa..a2aafa0dcf2a7d2ac99062802f65d8586c82d278 100644 --- a/model-00039-of-00088.safetensors +++ b/model-00039-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aec3310aa068f5d30e3ee614b7a1d88e004a3bdd393789cb87e8ddef3f93f7c9 +oid sha256:dd60548afce53b3712eccc1031cf3a8f2d70f32c0491b83ad806fd9d71a627e9 size 4227859220 diff --git a/model-00040-of-00088.safetensors b/model-00040-of-00088.safetensors index b29463e29fc9a158ffb24e1f6f01769eda7d5d0a..a4697f7a644e1919b3df05f77bdd4b98e83148c6 100644 --- a/model-00040-of-00088.safetensors +++ b/model-00040-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b999fd7930a1b4b33abffca1a6ded0cdb1c2b3aa6c3eddffb8463e7240165df -size 4361585427 +oid sha256:3655ad3bb960bcf3b234d590f0b826a85808f237997b82f8b4044a97ba53af7a +size 4361585939 diff --git a/model-00041-of-00088.safetensors b/model-00041-of-00088.safetensors index a61bd461cae3f0567eacb47b302c92f66536d42d..7872a36da5d5fade261fe875856c33a51ed6056d 100644 --- a/model-00041-of-00088.safetensors +++ b/model-00041-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:712ac11db9b1ff9a33b2ff52ffbe33b292086afb14d014f016d8688ccaf7c708 -size 4361585389 +oid sha256:03811b0e7e10fae0f4bb1930f32856f313f1357cb17b26bef4c0dcfe86ec8784 +size 4361585901 diff --git a/model-00042-of-00088.safetensors b/model-00042-of-00088.safetensors index a443ac197cf17c2c981f45e6eb12599154c28202..712763f5bc870152c53338ced0d2f0fb316ea657 100644 --- a/model-00042-of-00088.safetensors +++ b/model-00042-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1704dcda42501339d011dc664757788229038fac892e76d53a41a52476872304 +oid sha256:f06f8d344ee0c926ac39a932ec8e1761cfe050216307e909f1d8e76a12faf329 size 4227859224 diff --git a/model-00043-of-00088.safetensors b/model-00043-of-00088.safetensors index 62ae091e7eb46c8cda7260c852640efbcad404df..77befc8d5f16f3536de078c5eee9ce150a4b9a4e 100644 --- a/model-00043-of-00088.safetensors +++ b/model-00043-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd1e386a0f57df0a9ee07fd4a2a6acb564dfa0a2fa2470212275c7e23d61b4cb -size 4361585485 +oid sha256:d2e03f3648d8a8efa1d8690be12fe44b7320f40f366db7513d281be100253831 +size 4361585997 diff --git a/model-00044-of-00088.safetensors b/model-00044-of-00088.safetensors index 7af5d9d25cffc0e08860e45f5cbdb4530a7e1e59..d79391b006c99cc8f7f339c42eb33563ce58b3ff 100644 --- a/model-00044-of-00088.safetensors +++ b/model-00044-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fd4888d92d61acadde651e92c2430d490f19be6921c0bd1e4fe9ccaec90294b -size 4361585425 +oid sha256:9fd06a3592f5a1829ed33de6fab42d3f186480557e007fb99b4f3cca91f4d076 +size 4361585937 diff --git a/model-00045-of-00088.safetensors b/model-00045-of-00088.safetensors index f446c113a108e6ed48327dbae17240a497f09032..31bdeb7e31eef4856fb5c051ad59c8ce5808a0e4 100644 --- a/model-00045-of-00088.safetensors +++ b/model-00045-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b18128aad03c99ca7ff4c968963c46eeb504b815b3a51c9b749e9f0a4f8304c +oid sha256:2fd10213eac1bbec48311c3046354be068e96e8f7e177db5692210a9aeae7214 size 4227859224 diff --git a/model-00046-of-00088.safetensors b/model-00046-of-00088.safetensors index 140f458b74a798d5818b7873a8baeea5c7520354..7fd48666179a134ed556fad099dcc27beb4f74f4 100644 --- a/model-00046-of-00088.safetensors +++ b/model-00046-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0817a48bef8ed600c641471784f7edbc63b83f42af1a2c6480febe5b4daa2663 -size 4361585485 +oid sha256:2ff6373a0b3b0b717fa7d0f3fa0a1dbbe7649b8815e928d8a986c488e70bb7e0 +size 4361585997 diff --git a/model-00047-of-00088.safetensors b/model-00047-of-00088.safetensors index 8e3e8f771d3ac20ecea92aeaf837e130f7bfa9d3..54192a5fc4edf4067efb5222f4af8f2dee80b4ba 100644 --- a/model-00047-of-00088.safetensors +++ b/model-00047-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d47e9ad4964efed79547703edfe3af61f76e6bef03aab0f72ad9af9716f01af9 -size 4361585385 +oid sha256:2d1f446ada567bbd8e529c4d13f9e630ae30dcad69b8ff49b11d0aa21346e47e +size 4361585897 diff --git a/model-00048-of-00088.safetensors b/model-00048-of-00088.safetensors index c5c7e996156a7b2b2935dc01c3831599287f9731..e1f01b8b43c5f5060cd61ce2326e7de87c412912 100644 --- a/model-00048-of-00088.safetensors +++ b/model-00048-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6438725ce77302d7f9d7b6592cda80c5dc58f883235d06af37add451e57a68b +oid sha256:7a6884b67d9e57cd232b9190e2d53b3294bcba450feddc1529babf06682548c8 size 4227859224 diff --git a/model-00049-of-00088.safetensors b/model-00049-of-00088.safetensors index 7d52fd5a8a65af665dcab132f20bea4b750580fd..4c270a092f64a6e9c5401b4575a6351c676c5362 100644 --- a/model-00049-of-00088.safetensors +++ b/model-00049-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:472e60a1cc389d86cc62537d49be8a53a5b944c6b257c00cc1f2b2831252cc28 -size 4361585429 +oid sha256:15b150c9460d74003edf5e7cbf019df506ba2c1b0842d54b40dcfffe2ee615b8 +size 4361585941 diff --git a/model-00050-of-00088.safetensors b/model-00050-of-00088.safetensors index cc1862389fe5a83f8c1b42733583ecae2f0669b3..19a0322d848b02f6eaadf3bbb5ad585b2f28c671 100644 --- a/model-00050-of-00088.safetensors +++ b/model-00050-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20ad8c561853f3b3652f03571a69a6f3ded2d84cfdb8fa4f6a58cca4aa41d1c5 -size 4361585421 +oid sha256:0488db905dd32871459b02f996c464d6f84cc112dc6860c48c6e0284131bb38e +size 4361585933 diff --git a/model-00051-of-00088.safetensors b/model-00051-of-00088.safetensors index 2638c77bc2c6d024821cc21e607fd456abe93240..3a52fbd30956be5ee24450887604f60ebad9717d 100644 --- a/model-00051-of-00088.safetensors +++ b/model-00051-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76debf85cef3914a1c978d47395411036342f0a1d0c6214a69d1689474f562f4 +oid sha256:e24dd1cc68aa76580c9aa9aa3aa642625a7b7cdf9391867a109f1148f03d8eb2 size 4227859226 diff --git a/model-00052-of-00088.safetensors b/model-00052-of-00088.safetensors index 01dce285c106d41999e85285e400a4ef53e0f319..bd7cc661f57ebfa9a62a56bbcf967d8961a11def 100644 --- a/model-00052-of-00088.safetensors +++ b/model-00052-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b297fef83a324acc63260d311554456477e70f2d2cc27a831a30ddd4bff9a21b -size 4361585487 +oid sha256:1cb01f221f0c1de746fecd43051df894cc0390efde89dedc2dcbaf64809f39c9 +size 4361585999 diff --git a/model-00053-of-00088.safetensors b/model-00053-of-00088.safetensors index 27cbdbcd58ad061138ef386783bdbe8d4bc2e834..131ff9d54396494a9d0ffed72f09340b5d09e741 100644 --- a/model-00053-of-00088.safetensors +++ b/model-00053-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:123b6b0ad9eebc02aad01a73cb396e3e7cdd892bf693b05a792ed38bf6044991 -size 4361585395 +oid sha256:57716bba17157ea915c14d25e6a55728eb693000d71c349742ae588a4e440443 +size 4361585907 diff --git a/model-00054-of-00088.safetensors b/model-00054-of-00088.safetensors index a6e6e1e879761b375fe352cda736b44f64a186f2..7d3fd7848be3fdf3552e13d918119fb0812a3c70 100644 --- a/model-00054-of-00088.safetensors +++ b/model-00054-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:876355d19361b5eae1b9cdfbf32f38405686e59959f9e81db1676cfe249d43db +oid sha256:58e369635f934bc2749f8f3bb152a830fef1986afee0fd8ed2ee772f15705098 size 4227859222 diff --git a/model-00055-of-00088.safetensors b/model-00055-of-00088.safetensors index 21196fecad1fa5b89471ab36144283cfa35c1638..1b0091a3d890c2cc0bed5adce0041871e8d275f6 100644 --- a/model-00055-of-00088.safetensors +++ b/model-00055-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b36aa53e7e2a6b5b91b8a5d0f7cb8f6082a3411b98ac2e8f6992fc76e8880ed -size 4361585441 +oid sha256:f1c3a8b134ec105ee3bb248d3620cd9547704f99e2e4b7f2aeed5dfc4b518121 +size 4361585953 diff --git a/model-00056-of-00088.safetensors b/model-00056-of-00088.safetensors index 1f9ee2efe9fdcb2b47aa8082bfdb888a6bd68838..c98f12895e28a2c6a173f3a06c2d3a8e317dfb76 100644 --- a/model-00056-of-00088.safetensors +++ b/model-00056-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c5914f79e857e1ff859ab058b2911aa1c0c67772b44bf74fb81d944b14f4cf0 -size 4361585401 +oid sha256:7418c54295472a995440a1127834a2ae785f47e8be6c2ca30f49527acf8a718e +size 4361585913 diff --git a/model-00057-of-00088.safetensors b/model-00057-of-00088.safetensors index 4726ba74685e70b0ad773fdebe79e59c3c700663..0d89027288b800e27359260917e5c80dbb6eea08 100644 --- a/model-00057-of-00088.safetensors +++ b/model-00057-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c3ee481f234df07d1346f7bcf2868e7cfdb21a02a31716b9835a25857e0f8b8 +oid sha256:abb0960fbd677b804b52efc656f63178b73f9310412d7869998d9914f7126031 size 4227859224 diff --git a/model-00058-of-00088.safetensors b/model-00058-of-00088.safetensors index 951370060ea9252cf16dd6e15d5f47156b1b1b94..39356005dfbf50585c3dba84cac20c1d34ab5b66 100644 --- a/model-00058-of-00088.safetensors +++ b/model-00058-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96c951b365e79b6dd2c4b953ab90a1c33b885ccffdae2dbb2b3d2dd7f57b9f24 -size 4361585487 +oid sha256:c94b8fb1a7757848744888fc1461052d0b28a564361d17e18375530765e95978 +size 4361585999 diff --git a/model-00059-of-00088.safetensors b/model-00059-of-00088.safetensors index 3fb006d1776e118efc6f7ff4a33c92c2e6c59a46..36f7b8de5a4c5ca0f98f0a9dd5b5ceb036a4529e 100644 --- a/model-00059-of-00088.safetensors +++ b/model-00059-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3b80759e60875abe57cdb383c51ac058a619431e0a6b2726731ff00e083d3ce -size 4361585419 +oid sha256:add8c031e62317b9dd463d222e0913b36f8ced8c70c48c53929b0c039875b046 +size 4361585931 diff --git a/model-00060-of-00088.safetensors b/model-00060-of-00088.safetensors index 205147a5593ed25451890c1e8b025923adc8ed3c..e94c5bbc9a6382101a1b0358064494841c968441 100644 --- a/model-00060-of-00088.safetensors +++ b/model-00060-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e4d741f272be962106597f0a1ccafee9d6f0359ee1b3e94b1130d8e7b5d6bd4 +oid sha256:5a728d924d78764f9258fba8a032ce0464e230c4f6edfe5f9af6e730c765b57e size 4227859224 diff --git a/model-00061-of-00088.safetensors b/model-00061-of-00088.safetensors index 7d120fd1c339b2b13574f744507cb702da8802ac..0a58025cff04f9e6de27395b2da29aa38171490b 100644 --- a/model-00061-of-00088.safetensors +++ b/model-00061-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a09d3a30f16d98c7cce6f60744c27c03170e3dd9ef2230ab77318f592d1a32c0 -size 4361585419 +oid sha256:c67dcee2f400fd0839df113a43938b60af8ac2f25a296238bb0502a918034f2b +size 4361585931 diff --git a/model-00062-of-00088.safetensors b/model-00062-of-00088.safetensors index d90efeb4d9e200f6f3d1fa6922815254d0daae95..dcdfaa771b3581e88a7537cd8b46b0aa696e8ee7 100644 --- a/model-00062-of-00088.safetensors +++ b/model-00062-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9bd3d757a3507efcf8c8e2ec6deaa394c8fd551e3230267b19dd04f74387a52 -size 4361585397 +oid sha256:bd042a4a3c72a7348b8baaac84ce4a9661d32f8ac13c09e650cb8ce6d66be8a3 +size 4361585909 diff --git a/model-00063-of-00088.safetensors b/model-00063-of-00088.safetensors index da76159fdd97bf84e3f89bad2ae55b5ded490ff8..560bb75e633018ed20fd10ceca4fa3b5677213fd 100644 --- a/model-00063-of-00088.safetensors +++ b/model-00063-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:732f82013f90cda54649b0ff6e11c50b8d1116c8f5d1cf6963303e63c0080d91 +oid sha256:3239ed2573be4b221ee90b362ee4465560a1f34d8a77f3cc805c0a44ad50517e size 4227859224 diff --git a/model-00064-of-00088.safetensors b/model-00064-of-00088.safetensors index e2246dc013ab28958c9510fa935d7a8755969208..d5f7b29b1ba9e8b7b00a5684f1bdad2de0bd5489 100644 --- a/model-00064-of-00088.safetensors +++ b/model-00064-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:232393fb27b02abdfa0194bf0eb47b7c2cd3ff012a8793e93e642f7a76660714 -size 4361585485 +oid sha256:89dabe495fa8b5083aba6b90cd937e79eed0a070d2891ac4123442b13b993ee0 +size 4361585997 diff --git a/model-00065-of-00088.safetensors b/model-00065-of-00088.safetensors index 7319566ab2e0a91b0f688de687803d0fdd643ba5..b5a6bb47651e005362f3fe939668267a764c1e31 100644 --- a/model-00065-of-00088.safetensors +++ b/model-00065-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ea3ac9588d86d04d8deb852f7fbe8bf424c0948be8b2b795b0a9759e60cc408 -size 4361585383 +oid sha256:8cb49a1555c6bb2b3608d646c6c24be2af60dc4d2a55a174341d833ab085a0f4 +size 4361585895 diff --git a/model-00066-of-00088.safetensors b/model-00066-of-00088.safetensors index 0613c87971abdc178ed1dd6dfac46f504a215845..295226487670727b63d4c2d534143ba68940ac73 100644 --- a/model-00066-of-00088.safetensors +++ b/model-00066-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:732ccd88e78f1aa3720379e2963bbcf279e718990f140d934a3c3bc0466cc29b +oid sha256:5775d16c7099d089f8e619c18ae02ea438ee2a14f6998559197c2c18cd5fa0b2 size 4227859218 diff --git a/model-00067-of-00088.safetensors b/model-00067-of-00088.safetensors index 1b1facbac2c0647c06b346dd29c045ad26c90c39..7dd8766fa3ae7acb1bb93e633aaf3149220eeeaf 100644 --- a/model-00067-of-00088.safetensors +++ b/model-00067-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb71808368b6e1e64ffe7809639f3d0e868c43aa74f54d33743ad66c9ffb5f77 -size 4361585469 +oid sha256:482ba2113acf9a3b3a86eb0ffe647609d6e95ca24a36cef837b8963517d6db9b +size 4361585981 diff --git a/model-00068-of-00088.safetensors b/model-00068-of-00088.safetensors index 8a46424146b503c04f70c0654d60799c39062d7b..ba73c69139f50a0dfd53db96d7fd80b6d612e8bf 100644 --- a/model-00068-of-00088.safetensors +++ b/model-00068-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c2351a7175d04b8f113f31ef1f99dd5464db0069a1eeb95a65850bedf7730c8 -size 4361585375 +oid sha256:0f9769317b5aa83c723481ab682127a91206930eefa12d2751a149e8a2bff21a +size 4361585887 diff --git a/model-00069-of-00088.safetensors b/model-00069-of-00088.safetensors index d7259bae63d96d311f4ba5887b6b9ed62619d360..add6d0a8f3cb1c251473d906046d5724e0fdcfc3 100644 --- a/model-00069-of-00088.safetensors +++ b/model-00069-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc6df1caa4dda9ab9587192c2fea1f20cb2aef3990c7ffef40af3fea5aae8ad4 +oid sha256:95302d446b71b6c50955629e47afc16cd5edf88d473cfe28ec53863a78ea6b45 size 4227859224 diff --git a/model-00070-of-00088.safetensors b/model-00070-of-00088.safetensors index aa1147a19d455bffe45ba6978fe05e03234cd407..5c8a4e16c6fa05e1ee0b1649f72123005e1acfc4 100644 --- a/model-00070-of-00088.safetensors +++ b/model-00070-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53876914c8b928b32d03a0ecdf910b6dfea0f7dbf373e73f7c23d0189a7f270f -size 4361585487 +oid sha256:56eef325557810c269185c0f3de22ad89a11e99325cbba700ae7edd3a5de1c9e +size 4361585999 diff --git a/model-00071-of-00088.safetensors b/model-00071-of-00088.safetensors index 273409ce687192cbc2434c626eedaded60bcd8cb..b3fc14b2fa7b3f432f0983f439424b5ae80f1aca 100644 --- a/model-00071-of-00088.safetensors +++ b/model-00071-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b0c9765e498b2d095aeaa7aa3d6b50c5ef961a5950b7098f7855d3998e550b5 -size 4361585401 +oid sha256:a2234e5ed54e6d90d7a8e09321b0963eda2a505666d35edf435aa9647515e732 +size 4361585913 diff --git a/model-00072-of-00088.safetensors b/model-00072-of-00088.safetensors index adf6df5053ee0090fe16426fd53f31a4d845b8cc..0c1032f59808f3bde5da42eae68476e3c5732d0d 100644 --- a/model-00072-of-00088.safetensors +++ b/model-00072-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07a6a0900ca7ef6c3d5ca2e7e9687ac5ec84d2ea89b31665ae2e57fc2faf536e +oid sha256:97553be4c20a9301e5f2834dabda797938a502f6e5cce305d4b7a4b8ea7a780a size 4227859220 diff --git a/model-00073-of-00088.safetensors b/model-00073-of-00088.safetensors index e598c1c01f3d737e671b42ca1098cf05b8c66c61..fb8133b9e11f8a1980f1be195443b3adc93c7bee 100644 --- a/model-00073-of-00088.safetensors +++ b/model-00073-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ee7a004002f1aca9f106d8c780fe846d6a8a0ce6080398d0beac3175078a88a -size 4361585489 +oid sha256:814a161fe1223ed6e5e4137b9904d0d77c1fa635d550a83cd61e7ce74ed1e629 +size 4361586001 diff --git a/model-00074-of-00088.safetensors b/model-00074-of-00088.safetensors index f754aada1a831735da43dfaad921c598bd9dd524..e49914318817760f97c0c482cc00821fa7d380e0 100644 --- a/model-00074-of-00088.safetensors +++ b/model-00074-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ef6c4b72fe7af56271935a67f7a51f19e523aa03676e5ac074b4aeab6a31f96 -size 4361585377 +oid sha256:b8897cbd6f2b7a36ead5ebec47c7a43f4e287b681e800853f6374ed42c252938 +size 4361585889 diff --git a/model-00075-of-00088.safetensors b/model-00075-of-00088.safetensors index 5daab5923c6fd65cece18a650e90a70d62d3b64d..854263a13393b8c5b5714c735adb2ec60a022e82 100644 --- a/model-00075-of-00088.safetensors +++ b/model-00075-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7836661e8175379b01b9cca21dc6931476970b23842b9d18f83d99b5e37f7aa7 +oid sha256:7c10507ecea2d95f5d572b28abed7bfd0f233f2770b6e976488c06ab3b1c3464 size 4227859222 diff --git a/model-00076-of-00088.safetensors b/model-00076-of-00088.safetensors index 8c1b25dea9d365a28c9c5a5dac3432154d8f4e55..b0507765f1ec2bf688ecf170d4fc6c983867cd20 100644 --- a/model-00076-of-00088.safetensors +++ b/model-00076-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbd6bbe2312b75f0b2809c4972b2778221aceb3c4dbfe16275a7b6514c131e80 -size 4361585487 +oid sha256:a593577b71f247f36b2f2f03cab9a73b784e07cadf1be4c6f0d9fe3b2d64997c +size 4361585999 diff --git a/model-00077-of-00088.safetensors b/model-00077-of-00088.safetensors index ed25f00701a1819c3ef958da947e73b9fba5d009..c24ad687b588f505486c38d879b8e54664148a50 100644 --- a/model-00077-of-00088.safetensors +++ b/model-00077-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d528325f3bd4ab59707f962e1c75cde2f458132cee2aa1408ea1e0d75b2812af -size 4361585401 +oid sha256:6923b82f8fe0388cab283b95465d4ab254e8b7a38ae28777e3589b6b757c13c3 +size 4361585913 diff --git a/model-00078-of-00088.safetensors b/model-00078-of-00088.safetensors index 44184d5ed2aa234699a147be93d51945c8425509..caff79bf7b4b3d7c72e60ad2db457121c6b7ebe9 100644 --- a/model-00078-of-00088.safetensors +++ b/model-00078-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:430a1ed7dfc31315598284f63203969f4f2c8974da06e88ed318035c836d3f69 +oid sha256:25089d065918d5cb540ca15a593bf8777a1f3ae7df74aa9d151748bf4f7418d5 size 4227859224 diff --git a/model-00079-of-00088.safetensors b/model-00079-of-00088.safetensors index 61edbd8df33541d5163385163d0c2062cbee10d5..202c16f27b21627122a37ad2af95f2c5a9f3698a 100644 --- a/model-00079-of-00088.safetensors +++ b/model-00079-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fdbcc714447cd7850ff0b36d07321b475a07ff7a8ad8d8e9c8f31551a621f93 -size 4361585453 +oid sha256:79278727c838541bc4f43a62c9f92fed411f87c8b790cb91f93cb88ea39048a5 +size 4361585965 diff --git a/model-00080-of-00088.safetensors b/model-00080-of-00088.safetensors index cc9c281b640cbec309c5c0f995e9fa3fed275880..14c4d192144108f3e7a59bd3475f82283790a036 100644 --- a/model-00080-of-00088.safetensors +++ b/model-00080-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82878c1b598b13fbcff70a13a337c1af998cdad4124459a8c1d39c08d75ece93 -size 4361585409 +oid sha256:7a5b87baad867699bc628c95c54b89903ec73b8179dc2a71c8f9590a5d230366 +size 4361585921 diff --git a/model-00081-of-00088.safetensors b/model-00081-of-00088.safetensors index 1823c6b426bbbbeaf21f9b401ef83a00d9b51592..0fb15b3c5386738382cd4d8733754d43519fa3bc 100644 --- a/model-00081-of-00088.safetensors +++ b/model-00081-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75e6db8535a1b7135520e3a2d8875d334d81ac507a9991f8fe3d1e5f11c6fb0c +oid sha256:b67ab8217ab7245872b73efe99d0c78386d9f19112fc1867ae26947cd1776c61 size 4227859222 diff --git a/model-00082-of-00088.safetensors b/model-00082-of-00088.safetensors index 89b1a0cccba1aa212057d1c6a2171c645a37ac1c..7cad31037070454b7a293bfdd78171fe3c0bd58e 100644 --- a/model-00082-of-00088.safetensors +++ b/model-00082-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c92b3c88cf42227aabe3bfe82c3c1d8c5668eaa74c91ff7d495dfa4c76d31a8 -size 4361585487 +oid sha256:19380b3c1a5d1b0eb996f11738d93745939f177f4d7e265e0a3f848bd59e8e35 +size 4361585999 diff --git a/model-00083-of-00088.safetensors b/model-00083-of-00088.safetensors index 1e2c87820ad3bdec2b44046129b13aeaa830f7f7..138566f89b8655526d5491031800f7a9f5f3ad4e 100644 --- a/model-00083-of-00088.safetensors +++ b/model-00083-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99b8312dcdada8363f2e0b1c7dd348ade452a8052679a868d115462de92fc312 -size 4361585449 +oid sha256:4bb67576189bf55bce7aa885e8884426cf89c7e98092088393770b2ccc461c78 +size 4361585961 diff --git a/model-00084-of-00088.safetensors b/model-00084-of-00088.safetensors index e340ecc23a3fc6acf2db71b25fc5e8f591070c9d..590da04c265b84363a263c07c210353ce711e085 100644 --- a/model-00084-of-00088.safetensors +++ b/model-00084-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d02850ff516d057e1df103b98a385a7eeea3d37046c242c1d006ba6dfa6da67 +oid sha256:071e94d6bad83736ed3eb4e33ba206ee79a2e817e94017839d35cbdaf0d0d63a size 4227859224 diff --git a/model-00085-of-00088.safetensors b/model-00085-of-00088.safetensors index caf2fefbc707040727c7b2c7b1b50c05bc88aeae..d38212dc88c5783eed94c1d95f9c78bbbb095393 100644 --- a/model-00085-of-00088.safetensors +++ b/model-00085-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:829da09d7045af99657c614aa229edb136a1634c8aa8756abb00ba847cab15dc -size 4361585475 +oid sha256:6e3969b5f084f9ae4c6a9b9113064c61ac30bdc1de0bdd34a2c5f71f887cbc94 +size 4361585987 diff --git a/model-00086-of-00088.safetensors b/model-00086-of-00088.safetensors index 7a3242928960e627da10194b4689b861ad1cc54a..36385efdacfd5834285634012b26c13518a8c3ff 100644 --- a/model-00086-of-00088.safetensors +++ b/model-00086-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e983f1178a6baff106ea598fd9f6fa86c9275e9e4f896616e39d7d7f7fde8d0c -size 4361585409 +oid sha256:555187c7167906389f66da67ff433010372011820bd2f19ac9a19b7210f9b609 +size 4361585921 diff --git a/model-00087-of-00088.safetensors b/model-00087-of-00088.safetensors index 34da70ae4c56a386c68b8d8a03bc8b329712ca7e..67efde42d5c7cff60187c975277d4eaa9c90bbaa 100644 --- a/model-00087-of-00088.safetensors +++ b/model-00087-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:533ee7f00f13b1198b092e84e4755065f4abbde3b9666cf76584d47aa4eb8378 +oid sha256:41558b3a66e3e28bb194e0f86bf8add94d9e55963a20647e94c27a7f090b35d9 size 4227859222 diff --git a/model-00088-of-00088.safetensors b/model-00088-of-00088.safetensors index 3d11cb87bd6e40f1b03d0fd4c5de5db1bfd49b4d..2dd54ce66fffb5b0bd08060e3e7b6082726bdc05 100644 --- a/model-00088-of-00088.safetensors +++ b/model-00088-of-00088.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d2590bb0b715b7f4a62fc7210aa38536893370464c36e65b71f7155e5da7a81 -size 2663674656 +oid sha256:6ddea7b1ed8292571bc21a10baf722977ddce464b1738375a5729a2b2d37194b +size 2663675168 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index caeefb01ade273f54c36f96da4d0d5ddb9d5addf..7945fcc82836ee5114a5e2e1b5d497fa9f5884f8 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,6 +1,6 @@ { "metadata": { - "total_size": 377606822912 + "total_size": 377606852608 }, "weight_map": { "lm_head.biases": "model-00088-of-00088.safetensors", diff --git a/modeling_deepseek.py b/modeling_deepseek.py index 1192a0063d01688f10fb33ba9abb3e0017e6bb71..28d9ea27aa530726b99c1ec32abc3c568de1f012 100644 --- a/modeling_deepseek.py +++ b/modeling_deepseek.py @@ -398,7 +398,6 @@ class MoEGate(nn.Module): self.n_routed_experts = config.n_routed_experts self.routed_scaling_factor = config.routed_scaling_factor self.scoring_func = config.scoring_func - self.seq_aux = config.seq_aux self.topk_method = config.topk_method self.n_group = config.n_group self.topk_group = config.topk_group @@ -455,7 +454,7 @@ class MoEGate(nn.Module): ) .reshape(bsz * seq_len, -1) ) # [n, e] - tmp_scores = scores_for_choice.masked_fill(~score_mask.bool(), 0.0) # [n, e] + tmp_scores = scores_for_choice.masked_fill(~score_mask.bool(), float("-inf")) # [n, e] _, topk_idx = torch.topk( tmp_scores, k=self.top_k, dim=-1, sorted=False ) diff --git a/tokenizer_config.json b/tokenizer_config.json index cee67cef5ad6c89a61c8a77c5633f4031c823764..c9fc70b8a94458cf038856226ab01cff79f2a13d 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -6554,10 +6554,10 @@ "eos_token": "<|end▁of▁sentence|>", "extra_special_tokens": {}, "legacy": true, - "model_max_length": 16384, + "model_max_length": 131072, "pad_token": "<|end▁of▁sentence|>", "sp_model_kwargs": {}, - "tokenizer_class": "LlamaTokenizer", + "tokenizer_class": "LlamaTokenizerFast", "unk_token": null, "use_default_system_prompt": false }