optimum-internal-testing
/

neuron-testing-cache

Model card Files Files and versions Community

dacorvo HF Staff commited on 15 days ago

Commit

6fb6d15

verified ·

1 Parent(s): 6ff0176

Synchronizing local compiler cache.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +42 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json +59 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json +59 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json +63 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json +63 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json +64 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json +59 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json +59 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json +60 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json +60 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json +83 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json +65 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json +65 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json +66 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json +66 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log +116 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json +1 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json +1 -0

.gitattributes CHANGED Viewed

@@ -4516,3 +4516,45 @@ neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/model.neff f
 neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 1.0,
+  "embedding_multiplier": 1.0,
+  "hidden_act": "silu",
+  "hidden_size": 32,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "logits_scaling": 1.0,
+  "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
+    "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "residual_multiplier": 1.0,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 49152
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 1.0,
+  "embedding_multiplier": 1.0,
+  "hidden_act": "silu",
+  "hidden_size": 32,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "logits_scaling": 1.0,
+  "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
+    "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "residual_multiplier": 1.0,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 49152
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "llamafactory/tiny-random-Llama-3",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "head_dim": 4,
+  "hidden_act": "silu",
+  "hidden_size": 16,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "llamafactory/tiny-random-Llama-3",
+    "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "llamafactory/tiny-random-Llama-3",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "head_dim": 4,
+  "hidden_act": "silu",
+  "hidden_size": 16,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "llamafactory/tiny-random-Llama-3",
+    "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+    "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 24,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 24
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "dacorvo/Mixtral-tiny",
+  "_task": "text-generation",
+  "architectures": [
+    "MixtralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3584,
+  "max_position_embeddings": 1024,
+  "model_type": "mixtral",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "dacorvo/Mixtral-tiny",
+    "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 8,
+  "num_local_experts": 8,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 32000
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "dacorvo/Mixtral-tiny",
+  "_task": "text-generation",
+  "architectures": [
+    "MixtralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3584,
+  "max_position_embeddings": 1024,
+  "model_type": "mixtral",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "dacorvo/Mixtral-tiny",
+    "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 8,
+  "num_local_experts": 8,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 32000
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "yujiepan/phi-4-tiny-random",
+  "_task": "text-generation",
+  "architectures": [
+    "Phi3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {},
+  "embd_pdrop": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 16,
+  "initializer_range": 0.02,
+  "intermediate_size": 32,
+  "max_position_embeddings": 16384,
+  "model_type": "phi3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "yujiepan/phi-4-tiny-random",
+    "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 1,
+  "original_max_position_embeddings": 16384,
+  "partial_rotary_factor": 1.0,
+  "resid_pdrop": 0.0,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 250000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 100352
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "yujiepan/phi-4-tiny-random",
+  "_task": "text-generation",
+  "architectures": [
+    "Phi3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {},
+  "embd_pdrop": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 16,
+  "initializer_range": 0.02,
+  "intermediate_size": 32,
+  "max_position_embeddings": 16384,
+  "model_type": "phi3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "yujiepan/phi-4-tiny-random",
+    "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 1,
+  "original_max_position_embeddings": 16384,
+  "partial_rotary_factor": 1.0,
+  "resid_pdrop": 0.0,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 250000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 100352
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen2.5-0.5B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+    "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 8,
+  "initializer_range": 0.02,
+  "intermediate_size": 16,
+  "layer_types": [
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 1,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
+    "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "factor": 4.0,
+    "original_max_position_embeddings": 32768,
+    "rope_type": "yarn",
+    "type": "yarn"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 8,
+  "initializer_range": 0.02,
+  "intermediate_size": 16,
+  "layer_types": [
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 1,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
+    "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "factor": 4.0,
+    "original_max_position_embeddings": 32768,
+    "rope_type": "yarn",
+    "type": "yarn"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3MoeForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "decoder_sparse_step": 2,
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 64,
+  "initializer_range": 0.02,
+  "intermediate_size": 128,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 1,
+  "mlp_only_layers": [],
+  "model_type": "qwen3_moe",
+  "moe_intermediate_size": 128,
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
+    "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "norm_topk_prob": true,
+  "num_attention_heads": 2,
+  "num_experts": 8,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 1,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3MoeForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "decoder_sparse_step": 2,
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 64,
+  "initializer_range": 0.02,
+  "intermediate_size": 128,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 1,
+  "mlp_only_layers": [],
+  "model_type": "qwen3_moe",
+  "moe_intermediate_size": 128,
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
+    "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "logical_nc_config": 1,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.19.8089.0+8ab9f450",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.3.1.dev5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": null,
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "norm_topk_prob": true,
+  "num_attention_heads": 2,
+  "num_experts": 8,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 1,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd7dae1ac13e262e1a7f9472b6a31e792e9c78b1214495916d9e83666910cb83
+size 567258

neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f6d4d1dc08dd89019640186d5368b1a308f0bd285a59f3f2e2a97e2bfc50b89
+size 14961664

neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@

+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:beff4a7ab7f70afbb24a85c88ce24e5bc5cfae6de236e3f9686176defedd5222
+size 81016

neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc275f95550b0e1be60dc7ed4bbc81233eab5133b3b0af0f576bae41fda91887
+size 297984

neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62347f65c836de46507f6affd655cd1f15290142541de20acc56feac1fcef987
+size 308436

neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43d7647959b5592dd250b2a9f5c68330b6f6bca38bd34f9fb881d2958e03f881
+size 83591

neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75e76aff9b7791f1118cf7da468310c32943d4a8a828a53d19775178041d8deb
+size 707584

neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@

neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c66551e445c34b0e221ecb2b2b7303796c4e0eb8ca338b4c297bebe314fed43
+size 69881

neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:876faeaea6b0587c3ff8663d3114f70d0484e607ab88e735a275ad01c223cbc9
+size 236544

neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ebfa9813b63ed97dbe6925f3ba4e5a94ea93bd0ad008070dc32430f87d9bd72
+size 247874

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2cfa8e742c4893766c3b63b6971e73469c20ed6ed959007a32f73a5d6e66751
+size 81550

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log ADDED Viewed

	@@ -0,0 +1,116 @@

+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: usage: neuronx-cc [-h] [--compatible-mode | --no-compatible-mode] [--disable-expensive-checks | --no-disable-expensive-checks]
+                  [--fp16-bilinear-upsampling | --no-fp16-bilinear-upsampling] [--tensor-no-opt-pass [TENSOR_NO_OPT_PASS]] [--set-tensors-no-opt [SET_TENSORS_NO_OPT]]
+                  [--model-specific-opt [MODEL_SPECIFIC_OPT]] [--statebuffer-scratch-size-in-bytes [STATEBUFFER_SCRATCH_SIZE_IN_BYTES]] [--target-mm-vec-size [TARGET_MM_VEC_SIZE]]
+                  [--disable-global-redundant-load-elimination | --no-disable-global-redundant-load-elimination] [--avoid-loop-reduce | --no-avoid-loop-reduce]
+                  [--disable-partition-vectorization | --no-disable-partition-vectorization]
+                  [--disable-tiling-of-non-overlapping-mem-access | --no-disable-tiling-of-non-overlapping-mem-access] [--weight-coalescing-threshold [WEIGHT_COALESCING_THRESHOLD]]
+                  [--static-weights | --no-static-weights] [--tensor-layout-p-order [TENSOR_LAYOUT_P_ORDER]] [--tensor-layout-b-order [TENSOR_LAYOUT_B_ORDER]]
+                  [--tensor-layout-f-order [TENSOR_LAYOUT_F_ORDER]] [--fp32-cast [FP32_CAST]] [--enable-replication | --no-enable-replication]
+                  [--use-inferentia-hwm | --no-use-inferentia-hwm] [--hbm-scratchpad-page-size-in-bytes [HBM_SCRATCHPAD_PAGE_SIZE_IN_BYTES]]
+                  [--enable-tensorized-spiller | --no-enable-tensorized-spiller] [--disable-spill-free-kernels | --no-disable-spill-free-kernels]
+                  [--enable-smt-allocator | --no-enable-smt-allocator] [--nki-manual-allocation | --no-nki-manual-allocation] [--enable-softmax-kernel | --no-enable-softmax-kernel]
+                  [--softmax-division-delay | --no-softmax-division-delay] [--accumulate-on-alu-dtype | --no-accumulate-on-alu-dtype]
+                  [--enable-shard-axis-verifier | --no-enable-shard-axis-verifier] [--non-local-tripcount-threshold [NON_LOCAL_TRIPCOUNT_THRESHOLD]]
+                  [--force-non-local-tensors [FORCE_NON_LOCAL_TENSORS]] [--force-concat-to-non-local | --no-force-concat-to-non-local]
+                  [--force-all-matmult-input-non-local | --no-force-all-matmult-input-non-local] [--large-1d-tensor-threshold [LARGE_1D_TENSOR_THRESHOLD]] [--dump-after [DUMP_AFTER]]
+                  [--dump-path [DUMP_PATH]] [--dump-files | --no-dump-files] [--save-weights | --no-save-weights] [--dump-nki | --no-dump-nki]
+                  [--auto-reduce-crash | --no-auto-reduce-crash] [--debug-mode | --no-debug-mode] [--profile-pass [PROFILE_PASS]] [--rollback-pass [ROLLBACK_PASS]]
+                  [--skip-pass [SKIP_PASS]] [--debug-pass [DEBUG_PASS]] [--max-prefetch-size-in-bytes [MAX_PREFETCH_SIZE_IN_BYTES]]
+                  [--max-indirect-dma-prefetch-size-in-bytes [MAX_INDIRECT_DMA_PREFETCH_SIZE_IN_BYTES]] [--max-statebuffer-tile-size-in-bytes [MAX_STATEBUFFER_TILE_SIZE_IN_BYTES]]
+                  [--max-computation-tile-size [MAX_COMPUTATION_TILE_SIZE]] [--max-local-tensor-tile-size-in-bytes [MAX_LOCAL_TENSOR_TILE_SIZE_IN_BYTES]]
+                  [--max-prefetch-buffer-size-in-bytes [MAX_PREFETCH_BUFFER_SIZE_IN_BYTES]] [--enable-trivial-dmacopy-transpose | --no-enable-trivial-dmacopy-transpose]
+                  [--enable-dmacopy-transpose | --no-enable-dmacopy-transpose] [--target-arithmetic-intensity [TARGET_ARITHMETIC_INTENSITY]]
+                  [--disable-experimental-addr-calc | --no-disable-experimental-addr-calc] [--pool-buffer-size [POOL_BUFFER_SIZE]] [--disable-new-scatter | --no-disable-new-scatter]
+                  [--enable-stream-transpose | --no-enable-stream-transpose] [--enable-transpose-reduce | --no-enable-transpose-reduce]
+                  [--enable-transpose-batchnormstats2 | --no-enable-transpose-batchnormstats2] [--force-transpose-batchnormstats2 | --no-force-transpose-batchnormstats2]
+                  [--mm-transpose-type [MM_TRANSPOSE_TYPE]] [--enable-fp32-mm-transpose | --no-enable-fp32-mm-transpose] [--disable-dma-cast | --no-disable-dma-cast]
+                  [--enable-8bit-tensorcopy-cast | --no-enable-8bit-tensorcopy-cast] [--min-allreduce-tile-size-in-byte [MIN_ALLREDUCE_TILE_SIZE_IN_BYTE]]
+                  [--min-allgather-tile-size-in-byte [MIN_ALLGATHER_TILE_SIZE_IN_BYTE]] [--max-inflight-allreduce [MAX_INFLIGHT_ALLREDUCE]]
+                  [--max-dma-access-free-depth [MAX_DMA_ACCESS_FREE_DEPTH]] [--dve-bn-stats-paritition-max-elements [DVE_BN_STATS_PARITITION_MAX_ELEMENTS]]
+                  [--max-batch-norm-reduction-size [MAX_BATCH_NORM_REDUCTION_SIZE]] [--spmd | --no-spmd] [--prioritize-minimize-transpose | --no-prioritize-minimize-transpose]
+                  [--enable-ccop-compute-overlap | --no-enable-ccop-compute-overlap] [--enable-fine-grained-ccop-compute-overlap | --no-enable-fine-grained-ccop-compute-overlap]
+                  [--fine-grained-ccop-compute-channels-per-ccop [FINE_GRAINED_CCOP_COMPUTE_CHANNELS_PER_CCOP]]
+                  [--enable-dse-after-mask-propagation | --no-enable-dse-after-mask-propagation] [--enable-dge-on-io-dma | --no-enable-dge-on-io-dma]
+                  [--enable-dge-on-spill-reload-dma | --no-enable-dge-on-spill-reload-dma] [--enable-dge-on-indirect-dma | --no-enable-dge-on-indirect-dma]
+                  [--enable-dge-on-vector-indirect-dma | --no-enable-dge-on-vector-indirect-dma] [--enable-dge-on-dst-reduce | --no-enable-dge-on-dst-reduce]
+                  [--enable-scalar-dge-vectorization | --no-enable-scalar-dge-vectorization] [--enable-dram-to-dram-transpose | --no-enable-dram-to-dram-transpose]
+                  [--run-pg-layout-and-tiling | --no-run-pg-layout-and-tiling] [--disable-delinearize-io-tensors | --no-disable-delinearize-io-tensors]
+                  [--delinearize-tensor-maximum-rank [DELINEARIZE_TENSOR_MAXIMUM_RANK]] [--delinearize-min-dim-size [DELINEARIZE_MIN_DIM_SIZE]]
+                  [--delinearize-maximum-loop-depth [DELINEARIZE_MAXIMUM_LOOP_DEPTH]] [--big-tensor-threshold-one-d-memcpy [BIG_TENSOR_THRESHOLD_ONE_D_MEMCPY]]
+                  [--disable-degraded-fusion | --no-disable-degraded-fusion] [--disable-tensor-op-io-reshape | --no-disable-tensor-op-io-reshape]
+                  [--disable-non-compatible-tensor-op-io-reshape | --no-disable-non-compatible-tensor-op-io-reshape] [--dont-delinearize-tensor | --no-dont-delinearize-tensor]
+                  [--disable-single-row-matmult | --no-disable-single-row-matmult] [--disable-single-column-matmult | --no-disable-single-column-matmult]
+                  [--enable-penguin-mac-count | --no-enable-penguin-mac-count] [--min-tc-threshold [MIN_TC_THRESHOLD]]
+                  [--disable-dropout-pattern-match | --no-disable-dropout-pattern-match] [--set-dropout-rate-as-keep | --no-set-dropout-rate-as-keep]
+                  [--enable-advanced-delinearization | --no-enable-advanced-delinearization] [--keep-rng-tensor-op | --no-keep-rng-tensor-op]
+                  [--big-tensor-threshold-one-d [BIG_TENSOR_THRESHOLD_ONE_D]] [--bir-json-version [BIR_JSON_VERSION]] [--dump-ccop-axes-group-graph | --no-dump-ccop-axes-group-graph]
+                  [--cnn-training-model | --no-cnn-training-model] [--enable-all-reduce-axes-as-par | --no-enable-all-reduce-axes-as-par]
+                  [--enable-pag-based-layout-analysis | --no-enable-pag-based-layout-analysis] [--enable-tiling-visualization | --no-enable-tiling-visualization]
+                  [--enable-edge-dump | --no-enable-edge-dump] [--override-pg-tile-size [OVERRIDE_PG_TILE_SIZE]] [--enable-p-to-pp-broadcast | --no-enable-p-to-pp-broadcast]
+                  [--partial-loop-fusion-max-iter [PARTIAL_LOOP_FUSION_MAX_ITER]] [--cast-to-round | --no-cast-to-round] [--keep-remat-dma-transpose | --no-keep-remat-dma-transpose]
+                  [--disable-lower-transpose-to-shuffle | --no-disable-lower-transpose-to-shuffle] [--disable-bitcasted-transpose | --no-disable-bitcasted-transpose]
+                  [--enable-bitcasted-transpose-all | --no-enable-bitcasted-transpose-all] [--enable-saturation-convert | --no-enable-saturation-convert]
+                  [--max-tiling-permutation [MAX_TILING_PERMUTATION]] [--loop-order-heuristic [LOOP_ORDER_HEURISTIC]] [--disable-max-stride-tiling | --no-disable-max-stride-tiling]
+                  [--flatten-single-column-dma | --no-flatten-single-column-dma] [--keep-builtins [KEEP_BUILTINS]] [--experimental-gpsimd-library [EXPERIMENTAL_GPSIMD_LIBRARY]]
+                  [--internal_dynamic_dma_scratch_size_per_partition [INTERNAL_DYNAMIC_DMA_SCRATCH_SIZE_PER_PARTITION]]
+                  [--internal-allow-rmsnorm-cascaded-reduce | --no-internal-allow-rmsnorm-cascaded-reduce] [--softmax-epsilon [SOFTMAX_EPSILON]]
+                  [--max-dma-duplication [MAX_DMA_DUPLICATION]] [--max-weight-rewrite-permutation [MAX_WEIGHT_REWRITE_PERMUTATION]]
+                  [--log-tiling-bottleneck-info | --no-log-tiling-bottleneck-info] [--inst-count-limit [INST_COUNT_LIMIT]] [--macro-instance-limit [MACRO_INSTANCE_LIMIT]]
+                  [--always-transpose | --no-always-transpose] [--enable-prefetch-block-tensors | --no-enable-prefetch-block-tensors]
+                  [--max-dma-legalization-permutation [MAX_DMA_LEGALIZATION_PERMUTATION]] [--disable-vectorize-dge-dma | --vectorize-dge-dma]
+                  [--eager-tkg-vectorize-dma | --no-eager-tkg-vectorize-dma] [--no-fine-grained-cc-spill | --no-no-fine-grained-cc-spill]
+                  [--layout-complexity-warning-threshold [LAYOUT_COMPLEXITY_WARNING_THRESHOLD]] [--partition const dim candidate threshold [PARTITION CONST DIM CANDIDATE THRESHOLD]]
+                  [--run-layout-viewer | --no-run-layout-viewer] [--non-local-num-loadstores-threshold [NON_LOCAL_NUM_LOADSTORES_THRESHOLD]]
+                  [--disable-degraded-flatten-axes | --no-disable-degraded-flatten-axes] [--use-accurate-reduce-cost-model | --no-use-accurate-reduce-cost-model]
+                  [--visualize-detailed-pag-graph | --no-visualize-detailed-pag-graph] [--visualize-simplified-pag-graph | --no-visualize-simplified-pag-graph]
+                  [--visualize-undecided-cc-graph | --no-visualize-undecided-cc-graph] [--disable-prefer-par-on-non-broadcast | --no-disable-prefer-par-on-non-broadcast]
+                  [--cycle-based-layout-solution-size-threshold [CYCLE_BASED_LAYOUT_SOLUTION_SIZE_THRESHOLD]]
+                  [--split-ucc-tensor-size-threshold-in-bytes [SPLIT_UCC_TENSOR_SIZE_THRESHOLD_IN_BYTES]] [--minimum-legal-par-tripcount [MINIMUM_LEGAL_PAR_TRIPCOUNT]]
+                  [--operator-fution-split-ratio [OPERATOR_FUTION_SPLIT_RATIO]] [--keep-tensor-names | --no-keep-tensor-names] [--show-scalar-values | --no-show-scalar-values]
+                  [--one-tensor-per-line | --no-one-tensor-per-line] [--no-ssa-style | --no-no-ssa-style] [--no-collapse-like-dims | --no-no-collapse-like-dims]
+                  [--keep-offloaded-mem-intrinsics | --no-keep-offloaded-mem-intrinsics] [--no-color-terminal | --no-no-color-terminal]
+                  [--dump-sharding-decision-graph | --no-dump-sharding-decision-graph] [--shard-axes [SHARD_AXES]]
+                  [--experimental-sharding-propagation | --no-experimental-sharding-propagation] [--mem-bound-ratio-for-mm-sharding [MEM_BOUND_RATIO_FOR_MM_SHARDING]]
+                  [--enable-lower-shard-axis-before-fusion | --no-enable-lower-shard-axis-before-fusion] [--enable-nki-attention-kernel | --no-enable-nki-attention-kernel]
+                  [--enable-software-pipelining | --no-enable-software-pipelining] [--internal-lnc-pad-sendrecv | --no-internal-lnc-pad-sendrecv]
+                  [--enable-send-recv-cce | --no-enable-send-recv-cce] [--use-ilp-layout-search | --no-use-ilp-layout-search]
+                  [--set-nki-shard-on-producer-consumer | --no-set-nki-shard-on-producer-consumer]
+                  [--insert-offloaded-transpose-dma-free-threshold [INSERT_OFFLOADED_TRANSPOSE_DMA_FREE_THRESHOLD]] [--enable-cast-in-select | --no-enable-cast-in-select]
+                  [--delinear-contract-dim | --no-delinear-contract-dim] [--vectorize-partitions | --no-vectorize-partitions]
+                  [--internal-disable-double-row-gen3 | --no-internal-disable-double-row-gen3] [--internal-autotune | --no-internal-autotune]
+                  [--internal-autotune-config [INTERNAL_AUTOTUNE_CONFIG]] [--internal-autotune-subprocess [INTERNAL_AUTOTUNE_SUBPROCESS]]
+                  [--internal-autotune-extraction-process [INTERNAL_AUTOTUNE_EXTRACTION_PROCESS]] [--tf-dma-size-in-bytes [TF_DMA_SIZE_IN_BYTES]]
+                  [--tf-low-memory-pressure-threshold [TF_LOW_MEMORY_PRESSURE_THRESHOLD]] [--enable-isl-in-injective-check | --no-enable-isl-in-injective-check]
+                  [--enable-symbolic-memory-pressure-estimation-tf | --no-enable-symbolic-memory-pressure-estimation-tf]
+                  [--allow-ccrank-axis-tritium-fusion | --no-allow-ccrank-axis-tritium-fusion]
+                  [--internal-autotune-tritium-use-more-tripcounts | --no-internal-autotune-tritium-use-more-tripcounts]
+                  [--internal-autotune-tritium-only-with-id [INTERNAL_AUTOTUNE_TRITIUM_ONLY_WITH_ID]] [--vectorize-strided-dma | --no-vectorize-strided-dma]
+                  [--profile-smt | --no-profile-smt] [--number-of-devices [NUMBER_OF_DEVICES]] [--cc-pipeline-tiling-factor [CC_PIPELINE_TILING_FACTOR]]
+                  [--no-cc-pipeline-tiling-for-fsdp | --no-no-cc-pipeline-tiling-for-fsdp] [--cc-pipeline-tiling-for-fsdp-only | --no-cc-pipeline-tiling-for-fsdp-only]
+                  [--experimental-convolution-kernel-match | --no-experimental-convolution-kernel-match] [--disable-inline-cast | --no-disable-inline-cast]
+                  [--disable-affine-select | --no-disable-affine-select] [--profile-memory-pressure | --no-profile-memory-pressure]
+                  [--report-n-lowest-utilization [REPORT_N_LOWEST_UTILIZATION]] [--vectorize-direct-dma | --no-vectorize-direct-dma]
+                  [--log-top-n-latency-dmas [LOG_TOP_N_LATENCY_DMAS]] [--low-psum-usage-threshold [LOW_PSUM_USAGE_THRESHOLD]]
+                  [--warn-parallelism-threshold [WARN_PARALLELISM_THRESHOLD]] [--disable-square-matmul | --no-disable-square-matmul]
+                  [--disable-vector-transpose | --no-disable-vector-transpose] [--disable-software-replication | --no-disable-software-replication]
+                  [--internal-disable-fma-on-ios | --no-internal-disable-fma-on-ios] [--nki-dl | --no-nki-dl] [--disable-tiling-allreduce | --no-disable-tiling-allreduce]
+                  [--annotate-no-spill-hint | --no-annotate-no-spill-hint] [--print-nki | --no-print-nki] [--nki-debug-mode | --no-nki-debug-mode]
+                  [--ccop-bucketing | --no-ccop-bucketing] [--fp32-cast-input-tensors | --no-fp32-cast-input-tensors] [--enable-tritium-loopfusion | --no-enable-tritium-loopfusion]
+                  [--enable-ternary-fission | --no-enable-ternary-fission] [--disable-insert-implicit-shard-axis | --no-disable-insert-implicit-shard-axis]
+                  [--enable-hoist-wlo-all-gather | --no-enable-hoist-wlo-all-gather] [--enable-hoist-fsdp-collectives | --no-enable-hoist-fsdp-collectives]
+                  [--disable-concat-delinearizer | --no-disable-concat-delinearizer] [--enable-aliasing-dependency-verifier | --no-enable-aliasing-dependency-verifier]
+                  [--enable-must-alias-to-iobuffer | --no-enable-must-alias-to-iobuffer] [--disable-partition-locality-tiling | --no-disable-partition-locality-tiling]
+                  [--enable-memory-pressure-driven-loop-fusion | --no-enable-memory-pressure-driven-loop-fusion] [--legalize-tensor-tensor-op | --no-legalize-tensor-tensor-op]
+                  [--layout-transform-heuristic [LAYOUT_TRANSFORM_HEURISTIC]] [--disable-bir-codegen-loadstore | --no-disable-bir-codegen-loadstore]
+                  [--dump-tensorizer-bir-json | --no-dump-tensorizer-bir-json] [--disable-rank-id-rewriting | --no-disable-rank-id-rewriting]
+                  [--vectorization-size [VECTORIZATION_SIZE]] [--atol [ATOL]] [--rtol [RTOL]] [--save-locals | --no-save-locals]
+                  [--no-simplify-before-simulation | --no-no-simplify-before-simulation] [--correct-precision-mode | --no-correct-precision-mode]
+                  [--dont-verify-after-all | --no-dont-verify-after-all] [--disable-debug-info-dump | --no-disable-debug-info-dump] [--run-pass-list [RUN_PASS_LIST]]
+                  [--dump-pass-list [DUMP_PASS_LIST]] [--dump-pass-list-and-exit | --no-dump-pass-list-and-exit] [--print-stats | --no-print-stats]
+                  [--run-simulator-after [RUN_SIMULATOR_AFTER]] [--enable-peephole-inst-combine | --no-enable-peephole-inst-combine]
+                  [--enable-repartitioning | --no-enable-repartitioning] [--no-ccop-barrier | --no-no-ccop-barrier]
+                  [--enable-iobuffer-to-must-alias | --no-enable-iobuffer-to-must-alias] [--custom-script [CUSTOM_SCRIPT]] [--enable-bir-converter [ENABLE_BIR_CONVERTER]]
+                  [--custom-compute [CUSTOM_COMPUTE]] [--enable-bircodegen-unroll [ENABLE_BIRCODEGEN_UNROLL]] [--fuse-param-to-neff | --no-fuse-param-to-neff]
+                  [--only-compile-subgraph [ONLY_COMPILE_SUBGRAPH]] [--model-type-transformer | --no-model-type-transformer] [--model-type-cnn-training | --no-model-type-cnn-training]
+                  [--distribution-type-llm-training | --no-distribution-type-llm-training] [--num-neuroncores-per-sengine [NUM_NEURONCORES_PER_SENGINE]]
+neuronx-cc: error: argument --cc-pipeline-tiling-factor: invalid int value: '2--vectorize-strided-dma'

neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73521137a0627d1cbcf3276af2044ea2e025b43384d5ba149c1ee9f28e06ae23
+size 88353

neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b66d4b1a0323238c3a55ab0dc7f54b938f398ac7b79ed19aed77d248df0ddc12
+size 308224

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e68080a74eeee69a4cca47ffb58ac94d4475079ee317556d3c4985d658030a7
+size 52641

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1faffbfaeb70a15963aaa15126906cc088054168366d15b32f56ff2235d63f96
+size 185344

neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f75310b79ede900f855f7d33db734190e3d8b1f8c38de1d620c7660449b8abde
+size 195539

neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done ADDED Viewed

File without changes

neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91462061b8dd8d78b36005464d30793234a3b8d0d65025605c7a747756879de4
+size 88814

neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fccf83e1f68a07690baf4dba83c9edb5a4a0dcb4f9b868628e780b46605ac229
+size 236544

neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@

+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]