littlebird13 commited on
Commit
b939f17
·
verified ·
1 Parent(s): d29dc93

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +17 -19
config.json CHANGED
@@ -11,9 +11,9 @@
11
  "hidden_act": "silu",
12
  "hidden_size": 4096,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 10944,
15
- "max_position_embeddings": 1048576,
16
- "max_window_layers": 28,
17
  "mlp_only_layers": [],
18
  "model_type": "qwen3_moe",
19
  "moe_intermediate_size": 1536,
@@ -24,7 +24,17 @@
24
  "num_hidden_layers": 94,
25
  "num_key_value_heads": 4,
26
  "output_router_logits": false,
27
- "qkv_bias": false,
 
 
 
 
 
 
 
 
 
 
28
  "quantization_config": {
29
  "activation_scheme": "dynamic",
30
  "modules_to_not_convert": [
@@ -312,23 +322,11 @@
312
  "model.layers.93.mlp.gate",
313
  "model.layers.93.post_attention_layernorm"
314
  ],
 
315
  "quant_method": "fp8",
316
  "weight_block_size": [
317
  128,
318
  128
319
  ]
320
- },
321
- "rms_norm_eps": 1e-06,
322
- "rope_scaling": null,
323
- "rope_theta": 5000000,
324
- "router_aux_loss_coef": 0.0,
325
- "shared_expert_intermediate_size": 0,
326
- "sliding_window": null,
327
- "tie_word_embeddings": false,
328
- "torch_dtype": "bfloat16",
329
- "transformers_version": "4.51.3",
330
- "use_cache": true,
331
- "use_qk_norm": true,
332
- "use_sliding_window": false,
333
- "vocab_size": 151936
334
- }
 
11
  "hidden_act": "silu",
12
  "hidden_size": 4096,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 12288,
15
+ "max_position_embeddings": 262144,
16
+ "max_window_layers": 94,
17
  "mlp_only_layers": [],
18
  "model_type": "qwen3_moe",
19
  "moe_intermediate_size": 1536,
 
24
  "num_hidden_layers": 94,
25
  "num_key_value_heads": 4,
26
  "output_router_logits": false,
27
+ "rms_norm_eps": 1e-06,
28
+ "rope_scaling": null,
29
+ "rope_theta": 5000000,
30
+ "router_aux_loss_coef": 0.001,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.51.0",
35
+ "use_cache": true,
36
+ "use_sliding_window": false,
37
+ "vocab_size": 151936,
38
  "quantization_config": {
39
  "activation_scheme": "dynamic",
40
  "modules_to_not_convert": [
 
322
  "model.layers.93.mlp.gate",
323
  "model.layers.93.post_attention_layernorm"
324
  ],
325
+ "fmt": "e4m3",
326
  "quant_method": "fp8",
327
  "weight_block_size": [
328
  128,
329
  128
330
  ]
331
+ }
332
+ }