dacorvo HF Staff commited on
Commit
14cbe32
·
verified ·
1 Parent(s): 100fee0

Synchronizing local compiler cache.

Browse files
Files changed (20) hide show
  1. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev3/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/fc72a21b6de27e9bcefe.json +79 -0
  2. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json +70 -0
  3. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json +70 -0
  4. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json +70 -0
  5. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json +74 -0
  6. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json +74 -0
  7. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json +74 -0
  8. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json +75 -0
  9. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json +70 -0
  10. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json +70 -0
  11. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json +70 -0
  12. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json +71 -0
  13. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json +71 -0
  14. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json +71 -0
  15. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json +76 -0
  16. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json +76 -0
  17. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json +76 -0
  18. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json +77 -0
  19. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json +77 -0
  20. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json +77 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev3/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/fc72a21b6de27e9bcefe.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 1,
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 6144,
16
+ "max_position_embeddings": 262144,
17
+ "max_window_layers": 48,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 768,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 8,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
29
+ "checkpoint_revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 8,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 8,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev3",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 8,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 32,
65
+ "num_experts": 128,
66
+ "num_experts_per_tok": 8,
67
+ "num_hidden_layers": 48,
68
+ "num_key_value_heads": 4,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 10000000,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": false,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev4",
48
+ "output_logits": false,
49
+ "pp_degree": 1,
50
+ "qkv_kernel_enabled": false,
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "residual_multiplier": 1.0,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 10000.0,
67
+ "tie_word_embeddings": false,
68
+ "use_cache": true,
69
+ "vocab_size": 49152
70
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev4",
48
+ "output_logits": false,
49
+ "pp_degree": 1,
50
+ "qkv_kernel_enabled": false,
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "bfloat16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "residual_multiplier": 1.0,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 10000.0,
67
+ "tie_word_embeddings": false,
68
+ "use_cache": true,
69
+ "vocab_size": 49152
70
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev4",
48
+ "output_logits": false,
49
+ "pp_degree": 1,
50
+ "qkv_kernel_enabled": false,
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "residual_multiplier": 1.0,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 10000.0,
67
+ "tie_word_embeddings": false,
68
+ "use_cache": true,
69
+ "vocab_size": 49152
70
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 4,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 4,
61
+ "pretraining_tp": 1,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_scaling": {
64
+ "factor": 8.0,
65
+ "high_freq_factor": 4.0,
66
+ "low_freq_factor": 1.0,
67
+ "original_max_position_embeddings": 8192,
68
+ "rope_type": "llama3"
69
+ },
70
+ "rope_theta": 500000.0,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 128256
74
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 4,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 4,
61
+ "pretraining_tp": 1,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_scaling": {
64
+ "factor": 8.0,
65
+ "high_freq_factor": 4.0,
66
+ "low_freq_factor": 1.0,
67
+ "original_max_position_embeddings": 8192,
68
+ "rope_type": "llama3"
69
+ },
70
+ "rope_theta": 500000.0,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 128256
74
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "bfloat16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 4,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 4,
61
+ "pretraining_tp": 1,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_scaling": {
64
+ "factor": 8.0,
65
+ "high_freq_factor": 4.0,
66
+ "low_freq_factor": 1.0,
67
+ "original_max_position_embeddings": 8192,
68
+ "rope_type": "llama3"
69
+ },
70
+ "rope_theta": 500000.0,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 128256
74
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 4096,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 32,
59
+ "num_hidden_layers": 16,
60
+ "num_key_value_heads": 8,
61
+ "pretraining_tp": 1,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_scaling": {
64
+ "factor": 32.0,
65
+ "high_freq_factor": 4.0,
66
+ "low_freq_factor": 1.0,
67
+ "original_max_position_embeddings": 8192,
68
+ "rope_type": "llama3"
69
+ },
70
+ "rope_theta": 500000.0,
71
+ "tie_word_embeddings": true,
72
+ "unsloth_fixed": true,
73
+ "use_cache": true,
74
+ "vocab_size": 128256
75
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.1.dev4",
44
+ "output_logits": false,
45
+ "pp_degree": 1,
46
+ "qkv_kernel_enabled": false,
47
+ "sequence_length": 100,
48
+ "sequence_parallel_enabled": false,
49
+ "speculation_length": 0,
50
+ "start_rank_id": 0,
51
+ "target": null,
52
+ "torch_dtype": "bfloat16",
53
+ "tp_degree": 2,
54
+ "vocab_parallel": false
55
+ },
56
+ "num_attention_heads": 32,
57
+ "num_experts_per_tok": 2,
58
+ "num_hidden_layers": 2,
59
+ "num_key_value_heads": 8,
60
+ "num_local_experts": 8,
61
+ "output_router_logits": false,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_theta": 10000.0,
64
+ "router_aux_loss_coef": 0.001,
65
+ "router_jitter_noise": 0.0,
66
+ "sliding_window": 4096,
67
+ "tie_word_embeddings": false,
68
+ "use_cache": true,
69
+ "vocab_size": 32000
70
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.1.dev4",
44
+ "output_logits": false,
45
+ "pp_degree": 1,
46
+ "qkv_kernel_enabled": false,
47
+ "sequence_length": 100,
48
+ "sequence_parallel_enabled": false,
49
+ "speculation_length": 0,
50
+ "start_rank_id": 0,
51
+ "target": null,
52
+ "torch_dtype": "float16",
53
+ "tp_degree": 2,
54
+ "vocab_parallel": false
55
+ },
56
+ "num_attention_heads": 32,
57
+ "num_experts_per_tok": 2,
58
+ "num_hidden_layers": 2,
59
+ "num_key_value_heads": 8,
60
+ "num_local_experts": 8,
61
+ "output_router_logits": false,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_theta": 10000.0,
64
+ "router_aux_loss_coef": 0.001,
65
+ "router_jitter_noise": 0.0,
66
+ "sliding_window": 4096,
67
+ "tie_word_embeddings": false,
68
+ "use_cache": true,
69
+ "vocab_size": 32000
70
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.1.dev4",
44
+ "output_logits": false,
45
+ "pp_degree": 1,
46
+ "qkv_kernel_enabled": false,
47
+ "sequence_length": 100,
48
+ "sequence_parallel_enabled": false,
49
+ "speculation_length": 0,
50
+ "start_rank_id": 0,
51
+ "target": null,
52
+ "torch_dtype": "float16",
53
+ "tp_degree": 2,
54
+ "vocab_parallel": false
55
+ },
56
+ "num_attention_heads": 32,
57
+ "num_experts_per_tok": 2,
58
+ "num_hidden_layers": 2,
59
+ "num_key_value_heads": 8,
60
+ "num_local_experts": 8,
61
+ "output_router_logits": false,
62
+ "rms_norm_eps": 1e-05,
63
+ "rope_theta": 10000.0,
64
+ "router_aux_loss_coef": 0.001,
65
+ "router_jitter_noise": 0.0,
66
+ "sliding_window": 4096,
67
+ "tie_word_embeddings": false,
68
+ "use_cache": true,
69
+ "vocab_size": 32000
70
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "bfloat16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 2,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 1,
61
+ "original_max_position_embeddings": 16384,
62
+ "partial_rotary_factor": 1.0,
63
+ "resid_pdrop": 0.0,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": null,
66
+ "rope_theta": 250000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": false,
69
+ "use_cache": true,
70
+ "vocab_size": 100352
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 2,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 1,
61
+ "original_max_position_embeddings": 16384,
62
+ "partial_rotary_factor": 1.0,
63
+ "resid_pdrop": 0.0,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": null,
66
+ "rope_theta": 250000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": false,
69
+ "use_cache": true,
70
+ "vocab_size": 100352
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev4",
46
+ "output_logits": false,
47
+ "pp_degree": 1,
48
+ "qkv_kernel_enabled": false,
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 2,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 1,
61
+ "original_max_position_embeddings": 16384,
62
+ "partial_rotary_factor": 1.0,
63
+ "resid_pdrop": 0.0,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": null,
66
+ "rope_theta": 250000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": false,
69
+ "use_cache": true,
70
+ "vocab_size": 100352
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
28
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": false,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.1.dev4",
48
+ "output_logits": false,
49
+ "pp_degree": 1,
50
+ "qkv_kernel_enabled": false,
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 2,
63
+ "rms_norm_eps": 1e-06,
64
+ "rope_scaling": {
65
+ "factor": 4.0,
66
+ "original_max_position_embeddings": 32768,
67
+ "rope_type": "yarn",
68
+ "type": "yarn"
69
+ },
70
+ "rope_theta": 1000000.0,
71
+ "sliding_window": null,
72
+ "tie_word_embeddings": false,
73
+ "use_cache": true,
74
+ "use_sliding_window": false,
75
+ "vocab_size": 152064
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
28
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": false,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev4",
48
+ "output_logits": false,
49
+ "pp_degree": 1,
50
+ "qkv_kernel_enabled": false,
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 2,
63
+ "rms_norm_eps": 1e-06,
64
+ "rope_scaling": {
65
+ "factor": 4.0,
66
+ "original_max_position_embeddings": 32768,
67
+ "rope_type": "yarn",
68
+ "type": "yarn"
69
+ },
70
+ "rope_theta": 1000000.0,
71
+ "sliding_window": null,
72
+ "tie_word_embeddings": false,
73
+ "use_cache": true,
74
+ "use_sliding_window": false,
75
+ "vocab_size": 152064
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
28
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": false,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev4",
48
+ "output_logits": false,
49
+ "pp_degree": 1,
50
+ "qkv_kernel_enabled": false,
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "bfloat16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 2,
63
+ "rms_norm_eps": 1e-06,
64
+ "rope_scaling": {
65
+ "factor": 4.0,
66
+ "original_max_position_embeddings": 32768,
67
+ "rope_type": "yarn",
68
+ "type": "yarn"
69
+ },
70
+ "rope_theta": 1000000.0,
71
+ "sliding_window": null,
72
+ "tie_word_embeddings": false,
73
+ "use_cache": true,
74
+ "use_sliding_window": false,
75
+ "vocab_size": 152064
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 2,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 2,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev4",
49
+ "output_logits": false,
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "norm_topk_prob": true,
62
+ "num_attention_heads": 2,
63
+ "num_experts": 8,
64
+ "num_experts_per_tok": 2,
65
+ "num_hidden_layers": 2,
66
+ "num_key_value_heads": 1,
67
+ "output_router_logits": false,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": null,
70
+ "rope_theta": 1000000.0,
71
+ "router_aux_loss_coef": 0.001,
72
+ "sliding_window": null,
73
+ "tie_word_embeddings": true,
74
+ "use_cache": true,
75
+ "use_sliding_window": false,
76
+ "vocab_size": 151936
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev4",
49
+ "output_logits": false,
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "norm_topk_prob": true,
62
+ "num_attention_heads": 2,
63
+ "num_experts": 8,
64
+ "num_experts_per_tok": 2,
65
+ "num_hidden_layers": 2,
66
+ "num_key_value_heads": 1,
67
+ "output_router_logits": false,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": null,
70
+ "rope_theta": 1000000.0,
71
+ "router_aux_loss_coef": 0.001,
72
+ "sliding_window": null,
73
+ "tie_word_embeddings": true,
74
+ "use_cache": true,
75
+ "use_sliding_window": false,
76
+ "vocab_size": 151936
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev4",
49
+ "output_logits": false,
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "norm_topk_prob": true,
62
+ "num_attention_heads": 2,
63
+ "num_experts": 8,
64
+ "num_experts_per_tok": 2,
65
+ "num_hidden_layers": 2,
66
+ "num_key_value_heads": 1,
67
+ "output_router_logits": false,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": null,
70
+ "rope_theta": 1000000.0,
71
+ "router_aux_loss_coef": 0.001,
72
+ "sliding_window": null,
73
+ "tie_word_embeddings": true,
74
+ "use_cache": true,
75
+ "use_sliding_window": false,
76
+ "vocab_size": 151936
77
+ }