Qwen
/

Qwen3-235B-A22B-Instruct-2507-FP8

Text Generation

Model card Files Files and versions

littlebird13 commited on Jul 21

Commit

b939f17

·

verified ·

1 Parent(s): d29dc93

Update config.json

Files changed (1) hide show

config.json +17 -19

config.json CHANGED Viewed

@@ -11,9 +11,9 @@
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
-  "intermediate_size": 10944,
-  "max_position_embeddings": 1048576,
-  "max_window_layers": 28,
   "mlp_only_layers": [],
   "model_type": "qwen3_moe",
   "moe_intermediate_size": 1536,
@@ -24,7 +24,17 @@
   "num_hidden_layers": 94,
   "num_key_value_heads": 4,
   "output_router_logits": false,
-  "qkv_bias": false,
   "quantization_config": {
     "activation_scheme": "dynamic",
     "modules_to_not_convert": [
@@ -312,23 +322,11 @@
       "model.layers.93.mlp.gate",
       "model.layers.93.post_attention_layernorm"
     ],
     "quant_method": "fp8",
     "weight_block_size": [
       128,
       128
     ]
-  },
-  "rms_norm_eps": 1e-06,
-  "rope_scaling": null,
-  "rope_theta": 5000000,
-  "router_aux_loss_coef": 0.0,
-  "shared_expert_intermediate_size": 0,
-  "sliding_window": null,
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.3",
-  "use_cache": true,
-  "use_qk_norm": true,
-  "use_sliding_window": false,
-  "vocab_size": 151936
-}

   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 262144,
+  "max_window_layers": 94,
   "mlp_only_layers": [],
   "model_type": "qwen3_moe",
   "moe_intermediate_size": 1536,
   "num_hidden_layers": 94,
   "num_key_value_heads": 4,
   "output_router_logits": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
   "quantization_config": {
     "activation_scheme": "dynamic",
     "modules_to_not_convert": [
       "model.layers.93.mlp.gate",
       "model.layers.93.post_attention_layernorm"
     ],
+    "fmt": "e4m3",
     "quant_method": "fp8",
     "weight_block_size": [
       128,
       128
     ]
+  }
+}