diff --git a/.gitattributes b/.gitattributes index 493662f9660b86bd942b9247d5f40d23b3cabc21..504bbef4dd8e8a9cf1d3b912fc761b82c96992bf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4516,3 +4516,45 @@ neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/model.neff f neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json new file mode 100644 index 0000000000000000000000000000000000000000..dbafcc03048a833e143b779df690888d97829a7c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json new file mode 100644 index 0000000000000000000000000000000000000000..1980ca61437357627b75dea01d99dc35babe62e8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json new file mode 100644 index 0000000000000000000000000000000000000000..e572c04e5a95ab0bbd0f54e6443cbce69b7ee62c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json new file mode 100644 index 0000000000000000000000000000000000000000..b4f9c859c2b506bd7ba73496a1fe9fa37d2e69c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json new file mode 100644 index 0000000000000000000000000000000000000000..77fe8af0483de1817d078cf7b80ce5fd70b271a8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json new file mode 100644 index 0000000000000000000000000000000000000000..8517d13d2422c5aad6b8aed073178cd6c423b361 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json new file mode 100644 index 0000000000000000000000000000000000000000..14482d4628cdb44db3c81944fb20ac91309c5ed4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json new file mode 100644 index 0000000000000000000000000000000000000000..cd532485caeb751c5b621708323662038216c604 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json new file mode 100644 index 0000000000000000000000000000000000000000..d032e0ed068694ff383f074bbb31e3f10590d43d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json new file mode 100644 index 0000000000000000000000000000000000000000..63dce2355ce77fd91dcb802ef86b54852d1b9ecb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json new file mode 100644 index 0000000000000000000000000000000000000000..ae9da4d5a952d707a1b2494323293162086a5ac1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json new file mode 100644 index 0000000000000000000000000000000000000000..813de212ad71ec43d9f08e50877d26cea0fc100a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json new file mode 100644 index 0000000000000000000000000000000000000000..48f54fdeaff316647d40a8cc9e63ed9e515af356 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json new file mode 100644 index 0000000000000000000000000000000000000000..0aac5df9b0d624ffb646a23d3208023c41daa49b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..53a9d64f53d0249d49fc89061208951aeb04d676 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7dae1ac13e262e1a7f9472b6a31e792e9c78b1214495916d9e83666910cb83 +size 567258 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7ac84ec41a091402fff380bd0e60fa9d8922ed8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6d4d1dc08dd89019640186d5368b1a308f0bd285a59f3f2e2a97e2bfc50b89 +size 14961664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..51723992d01807c9bc97d9b00d82c9c8ef974b94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beff4a7ab7f70afbb24a85c88ce24e5bc5cfae6de236e3f9686176defedd5222 +size 81016 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b9b9fed467f794f14cc7c8b744794e221262f21 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc275f95550b0e1be60dc7ed4bbc81233eab5133b3b0af0f576bae41fda91887 +size 297984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b0b587cdc952b98ba9c0d4e77d4fc6442fe857e0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62347f65c836de46507f6affd655cd1f15290142541de20acc56feac1fcef987 +size 308436 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e03ec74b4e69de91c04658dca659380a83905b03 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d7647959b5592dd250b2a9f5c68330b6f6bca38bd34f9fb881d2958e03f881 +size 83591 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b35d7b683f8a0782b0ab61cd27129d580f5af38 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e76aff9b7791f1118cf7da468310c32943d4a8a828a53d19775178041d8deb +size 707584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b24bdafabe78078d997f7762fc81e9ee28330bf0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c66551e445c34b0e221ecb2b2b7303796c4e0eb8ca338b4c297bebe314fed43 +size 69881 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d405efcca707df47ebc2e3b4766cb7d9193f2fd8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876faeaea6b0587c3ff8663d3114f70d0484e607ab88e735a275ad01c223cbc9 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d53df88607849568ff47fb5188b7a42dd02b03db --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ebfa9813b63ed97dbe6925f3ba4e5a94ea93bd0ad008070dc32430f87d9bd72 +size 247874 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..db398d5c9a94bb4ca339a847669d6a4ab5af55ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b09a1b5945c04edd50d854098e0642f2885f6f3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2cfa8e742c4893766c3b63b6971e73469c20ed6ed959007a32f73a5d6e66751 +size 81550 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log new file mode 100644 index 0000000000000000000000000000000000000000..45f9851f4cf89dc04c7fa261dc4ed280a0fa5c86 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log @@ -0,0 +1,116 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: usage: neuronx-cc [-h] [--compatible-mode | --no-compatible-mode] [--disable-expensive-checks | --no-disable-expensive-checks] + [--fp16-bilinear-upsampling | --no-fp16-bilinear-upsampling] [--tensor-no-opt-pass [TENSOR_NO_OPT_PASS]] [--set-tensors-no-opt [SET_TENSORS_NO_OPT]] + [--model-specific-opt [MODEL_SPECIFIC_OPT]] [--statebuffer-scratch-size-in-bytes [STATEBUFFER_SCRATCH_SIZE_IN_BYTES]] [--target-mm-vec-size [TARGET_MM_VEC_SIZE]] + [--disable-global-redundant-load-elimination | --no-disable-global-redundant-load-elimination] [--avoid-loop-reduce | --no-avoid-loop-reduce] + [--disable-partition-vectorization | --no-disable-partition-vectorization] + [--disable-tiling-of-non-overlapping-mem-access | --no-disable-tiling-of-non-overlapping-mem-access] [--weight-coalescing-threshold [WEIGHT_COALESCING_THRESHOLD]] + [--static-weights | --no-static-weights] [--tensor-layout-p-order [TENSOR_LAYOUT_P_ORDER]] [--tensor-layout-b-order [TENSOR_LAYOUT_B_ORDER]] + [--tensor-layout-f-order [TENSOR_LAYOUT_F_ORDER]] [--fp32-cast [FP32_CAST]] [--enable-replication | --no-enable-replication] + [--use-inferentia-hwm | --no-use-inferentia-hwm] [--hbm-scratchpad-page-size-in-bytes [HBM_SCRATCHPAD_PAGE_SIZE_IN_BYTES]] + [--enable-tensorized-spiller | --no-enable-tensorized-spiller] [--disable-spill-free-kernels | --no-disable-spill-free-kernels] + [--enable-smt-allocator | --no-enable-smt-allocator] [--nki-manual-allocation | --no-nki-manual-allocation] [--enable-softmax-kernel | --no-enable-softmax-kernel] + [--softmax-division-delay | --no-softmax-division-delay] [--accumulate-on-alu-dtype | --no-accumulate-on-alu-dtype] + [--enable-shard-axis-verifier | --no-enable-shard-axis-verifier] [--non-local-tripcount-threshold [NON_LOCAL_TRIPCOUNT_THRESHOLD]] + [--force-non-local-tensors [FORCE_NON_LOCAL_TENSORS]] [--force-concat-to-non-local | --no-force-concat-to-non-local] + [--force-all-matmult-input-non-local | --no-force-all-matmult-input-non-local] [--large-1d-tensor-threshold [LARGE_1D_TENSOR_THRESHOLD]] [--dump-after [DUMP_AFTER]] + [--dump-path [DUMP_PATH]] [--dump-files | --no-dump-files] [--save-weights | --no-save-weights] [--dump-nki | --no-dump-nki] + [--auto-reduce-crash | --no-auto-reduce-crash] [--debug-mode | --no-debug-mode] [--profile-pass [PROFILE_PASS]] [--rollback-pass [ROLLBACK_PASS]] + [--skip-pass [SKIP_PASS]] [--debug-pass [DEBUG_PASS]] [--max-prefetch-size-in-bytes [MAX_PREFETCH_SIZE_IN_BYTES]] + [--max-indirect-dma-prefetch-size-in-bytes [MAX_INDIRECT_DMA_PREFETCH_SIZE_IN_BYTES]] [--max-statebuffer-tile-size-in-bytes [MAX_STATEBUFFER_TILE_SIZE_IN_BYTES]] + [--max-computation-tile-size [MAX_COMPUTATION_TILE_SIZE]] [--max-local-tensor-tile-size-in-bytes [MAX_LOCAL_TENSOR_TILE_SIZE_IN_BYTES]] + [--max-prefetch-buffer-size-in-bytes [MAX_PREFETCH_BUFFER_SIZE_IN_BYTES]] [--enable-trivial-dmacopy-transpose | --no-enable-trivial-dmacopy-transpose] + [--enable-dmacopy-transpose | --no-enable-dmacopy-transpose] [--target-arithmetic-intensity [TARGET_ARITHMETIC_INTENSITY]] + [--disable-experimental-addr-calc | --no-disable-experimental-addr-calc] [--pool-buffer-size [POOL_BUFFER_SIZE]] [--disable-new-scatter | --no-disable-new-scatter] + [--enable-stream-transpose | --no-enable-stream-transpose] [--enable-transpose-reduce | --no-enable-transpose-reduce] + [--enable-transpose-batchnormstats2 | --no-enable-transpose-batchnormstats2] [--force-transpose-batchnormstats2 | --no-force-transpose-batchnormstats2] + [--mm-transpose-type [MM_TRANSPOSE_TYPE]] [--enable-fp32-mm-transpose | --no-enable-fp32-mm-transpose] [--disable-dma-cast | --no-disable-dma-cast] + [--enable-8bit-tensorcopy-cast | --no-enable-8bit-tensorcopy-cast] [--min-allreduce-tile-size-in-byte [MIN_ALLREDUCE_TILE_SIZE_IN_BYTE]] + [--min-allgather-tile-size-in-byte [MIN_ALLGATHER_TILE_SIZE_IN_BYTE]] [--max-inflight-allreduce [MAX_INFLIGHT_ALLREDUCE]] + [--max-dma-access-free-depth [MAX_DMA_ACCESS_FREE_DEPTH]] [--dve-bn-stats-paritition-max-elements [DVE_BN_STATS_PARITITION_MAX_ELEMENTS]] + [--max-batch-norm-reduction-size [MAX_BATCH_NORM_REDUCTION_SIZE]] [--spmd | --no-spmd] [--prioritize-minimize-transpose | --no-prioritize-minimize-transpose] + [--enable-ccop-compute-overlap | --no-enable-ccop-compute-overlap] [--enable-fine-grained-ccop-compute-overlap | --no-enable-fine-grained-ccop-compute-overlap] + [--fine-grained-ccop-compute-channels-per-ccop [FINE_GRAINED_CCOP_COMPUTE_CHANNELS_PER_CCOP]] + [--enable-dse-after-mask-propagation | --no-enable-dse-after-mask-propagation] [--enable-dge-on-io-dma | --no-enable-dge-on-io-dma] + [--enable-dge-on-spill-reload-dma | --no-enable-dge-on-spill-reload-dma] [--enable-dge-on-indirect-dma | --no-enable-dge-on-indirect-dma] + [--enable-dge-on-vector-indirect-dma | --no-enable-dge-on-vector-indirect-dma] [--enable-dge-on-dst-reduce | --no-enable-dge-on-dst-reduce] + [--enable-scalar-dge-vectorization | --no-enable-scalar-dge-vectorization] [--enable-dram-to-dram-transpose | --no-enable-dram-to-dram-transpose] + [--run-pg-layout-and-tiling | --no-run-pg-layout-and-tiling] [--disable-delinearize-io-tensors | --no-disable-delinearize-io-tensors] + [--delinearize-tensor-maximum-rank [DELINEARIZE_TENSOR_MAXIMUM_RANK]] [--delinearize-min-dim-size [DELINEARIZE_MIN_DIM_SIZE]] + [--delinearize-maximum-loop-depth [DELINEARIZE_MAXIMUM_LOOP_DEPTH]] [--big-tensor-threshold-one-d-memcpy [BIG_TENSOR_THRESHOLD_ONE_D_MEMCPY]] + [--disable-degraded-fusion | --no-disable-degraded-fusion] [--disable-tensor-op-io-reshape | --no-disable-tensor-op-io-reshape] + [--disable-non-compatible-tensor-op-io-reshape | --no-disable-non-compatible-tensor-op-io-reshape] [--dont-delinearize-tensor | --no-dont-delinearize-tensor] + [--disable-single-row-matmult | --no-disable-single-row-matmult] [--disable-single-column-matmult | --no-disable-single-column-matmult] + [--enable-penguin-mac-count | --no-enable-penguin-mac-count] [--min-tc-threshold [MIN_TC_THRESHOLD]] + [--disable-dropout-pattern-match | --no-disable-dropout-pattern-match] [--set-dropout-rate-as-keep | --no-set-dropout-rate-as-keep] + [--enable-advanced-delinearization | --no-enable-advanced-delinearization] [--keep-rng-tensor-op | --no-keep-rng-tensor-op] + [--big-tensor-threshold-one-d [BIG_TENSOR_THRESHOLD_ONE_D]] [--bir-json-version [BIR_JSON_VERSION]] [--dump-ccop-axes-group-graph | --no-dump-ccop-axes-group-graph] + [--cnn-training-model | --no-cnn-training-model] [--enable-all-reduce-axes-as-par | --no-enable-all-reduce-axes-as-par] + [--enable-pag-based-layout-analysis | --no-enable-pag-based-layout-analysis] [--enable-tiling-visualization | --no-enable-tiling-visualization] + [--enable-edge-dump | --no-enable-edge-dump] [--override-pg-tile-size [OVERRIDE_PG_TILE_SIZE]] [--enable-p-to-pp-broadcast | --no-enable-p-to-pp-broadcast] + [--partial-loop-fusion-max-iter [PARTIAL_LOOP_FUSION_MAX_ITER]] [--cast-to-round | --no-cast-to-round] [--keep-remat-dma-transpose | --no-keep-remat-dma-transpose] + [--disable-lower-transpose-to-shuffle | --no-disable-lower-transpose-to-shuffle] [--disable-bitcasted-transpose | --no-disable-bitcasted-transpose] + [--enable-bitcasted-transpose-all | --no-enable-bitcasted-transpose-all] [--enable-saturation-convert | --no-enable-saturation-convert] + [--max-tiling-permutation [MAX_TILING_PERMUTATION]] [--loop-order-heuristic [LOOP_ORDER_HEURISTIC]] [--disable-max-stride-tiling | --no-disable-max-stride-tiling] + [--flatten-single-column-dma | --no-flatten-single-column-dma] [--keep-builtins [KEEP_BUILTINS]] [--experimental-gpsimd-library [EXPERIMENTAL_GPSIMD_LIBRARY]] + [--internal_dynamic_dma_scratch_size_per_partition [INTERNAL_DYNAMIC_DMA_SCRATCH_SIZE_PER_PARTITION]] + [--internal-allow-rmsnorm-cascaded-reduce | --no-internal-allow-rmsnorm-cascaded-reduce] [--softmax-epsilon [SOFTMAX_EPSILON]] + [--max-dma-duplication [MAX_DMA_DUPLICATION]] [--max-weight-rewrite-permutation [MAX_WEIGHT_REWRITE_PERMUTATION]] + [--log-tiling-bottleneck-info | --no-log-tiling-bottleneck-info] [--inst-count-limit [INST_COUNT_LIMIT]] [--macro-instance-limit [MACRO_INSTANCE_LIMIT]] + [--always-transpose | --no-always-transpose] [--enable-prefetch-block-tensors | --no-enable-prefetch-block-tensors] + [--max-dma-legalization-permutation [MAX_DMA_LEGALIZATION_PERMUTATION]] [--disable-vectorize-dge-dma | --vectorize-dge-dma] + [--eager-tkg-vectorize-dma | --no-eager-tkg-vectorize-dma] [--no-fine-grained-cc-spill | --no-no-fine-grained-cc-spill] + [--layout-complexity-warning-threshold [LAYOUT_COMPLEXITY_WARNING_THRESHOLD]] [--partition const dim candidate threshold [PARTITION CONST DIM CANDIDATE THRESHOLD]] + [--run-layout-viewer | --no-run-layout-viewer] [--non-local-num-loadstores-threshold [NON_LOCAL_NUM_LOADSTORES_THRESHOLD]] + [--disable-degraded-flatten-axes | --no-disable-degraded-flatten-axes] [--use-accurate-reduce-cost-model | --no-use-accurate-reduce-cost-model] + [--visualize-detailed-pag-graph | --no-visualize-detailed-pag-graph] [--visualize-simplified-pag-graph | --no-visualize-simplified-pag-graph] + [--visualize-undecided-cc-graph | --no-visualize-undecided-cc-graph] [--disable-prefer-par-on-non-broadcast | --no-disable-prefer-par-on-non-broadcast] + [--cycle-based-layout-solution-size-threshold [CYCLE_BASED_LAYOUT_SOLUTION_SIZE_THRESHOLD]] + [--split-ucc-tensor-size-threshold-in-bytes [SPLIT_UCC_TENSOR_SIZE_THRESHOLD_IN_BYTES]] [--minimum-legal-par-tripcount [MINIMUM_LEGAL_PAR_TRIPCOUNT]] + [--operator-fution-split-ratio [OPERATOR_FUTION_SPLIT_RATIO]] [--keep-tensor-names | --no-keep-tensor-names] [--show-scalar-values | --no-show-scalar-values] + [--one-tensor-per-line | --no-one-tensor-per-line] [--no-ssa-style | --no-no-ssa-style] [--no-collapse-like-dims | --no-no-collapse-like-dims] + [--keep-offloaded-mem-intrinsics | --no-keep-offloaded-mem-intrinsics] [--no-color-terminal | --no-no-color-terminal] + [--dump-sharding-decision-graph | --no-dump-sharding-decision-graph] [--shard-axes [SHARD_AXES]] + [--experimental-sharding-propagation | --no-experimental-sharding-propagation] [--mem-bound-ratio-for-mm-sharding [MEM_BOUND_RATIO_FOR_MM_SHARDING]] + [--enable-lower-shard-axis-before-fusion | --no-enable-lower-shard-axis-before-fusion] [--enable-nki-attention-kernel | --no-enable-nki-attention-kernel] + [--enable-software-pipelining | --no-enable-software-pipelining] [--internal-lnc-pad-sendrecv | --no-internal-lnc-pad-sendrecv] + [--enable-send-recv-cce | --no-enable-send-recv-cce] [--use-ilp-layout-search | --no-use-ilp-layout-search] + [--set-nki-shard-on-producer-consumer | --no-set-nki-shard-on-producer-consumer] + [--insert-offloaded-transpose-dma-free-threshold [INSERT_OFFLOADED_TRANSPOSE_DMA_FREE_THRESHOLD]] [--enable-cast-in-select | --no-enable-cast-in-select] + [--delinear-contract-dim | --no-delinear-contract-dim] [--vectorize-partitions | --no-vectorize-partitions] + [--internal-disable-double-row-gen3 | --no-internal-disable-double-row-gen3] [--internal-autotune | --no-internal-autotune] + [--internal-autotune-config [INTERNAL_AUTOTUNE_CONFIG]] [--internal-autotune-subprocess [INTERNAL_AUTOTUNE_SUBPROCESS]] + [--internal-autotune-extraction-process [INTERNAL_AUTOTUNE_EXTRACTION_PROCESS]] [--tf-dma-size-in-bytes [TF_DMA_SIZE_IN_BYTES]] + [--tf-low-memory-pressure-threshold [TF_LOW_MEMORY_PRESSURE_THRESHOLD]] [--enable-isl-in-injective-check | --no-enable-isl-in-injective-check] + [--enable-symbolic-memory-pressure-estimation-tf | --no-enable-symbolic-memory-pressure-estimation-tf] + [--allow-ccrank-axis-tritium-fusion | --no-allow-ccrank-axis-tritium-fusion] + [--internal-autotune-tritium-use-more-tripcounts | --no-internal-autotune-tritium-use-more-tripcounts] + [--internal-autotune-tritium-only-with-id [INTERNAL_AUTOTUNE_TRITIUM_ONLY_WITH_ID]] [--vectorize-strided-dma | --no-vectorize-strided-dma] + [--profile-smt | --no-profile-smt] [--number-of-devices [NUMBER_OF_DEVICES]] [--cc-pipeline-tiling-factor [CC_PIPELINE_TILING_FACTOR]] + [--no-cc-pipeline-tiling-for-fsdp | --no-no-cc-pipeline-tiling-for-fsdp] [--cc-pipeline-tiling-for-fsdp-only | --no-cc-pipeline-tiling-for-fsdp-only] + [--experimental-convolution-kernel-match | --no-experimental-convolution-kernel-match] [--disable-inline-cast | --no-disable-inline-cast] + [--disable-affine-select | --no-disable-affine-select] [--profile-memory-pressure | --no-profile-memory-pressure] + [--report-n-lowest-utilization [REPORT_N_LOWEST_UTILIZATION]] [--vectorize-direct-dma | --no-vectorize-direct-dma] + [--log-top-n-latency-dmas [LOG_TOP_N_LATENCY_DMAS]] [--low-psum-usage-threshold [LOW_PSUM_USAGE_THRESHOLD]] + [--warn-parallelism-threshold [WARN_PARALLELISM_THRESHOLD]] [--disable-square-matmul | --no-disable-square-matmul] + [--disable-vector-transpose | --no-disable-vector-transpose] [--disable-software-replication | --no-disable-software-replication] + [--internal-disable-fma-on-ios | --no-internal-disable-fma-on-ios] [--nki-dl | --no-nki-dl] [--disable-tiling-allreduce | --no-disable-tiling-allreduce] + [--annotate-no-spill-hint | --no-annotate-no-spill-hint] [--print-nki | --no-print-nki] [--nki-debug-mode | --no-nki-debug-mode] + [--ccop-bucketing | --no-ccop-bucketing] [--fp32-cast-input-tensors | --no-fp32-cast-input-tensors] [--enable-tritium-loopfusion | --no-enable-tritium-loopfusion] + [--enable-ternary-fission | --no-enable-ternary-fission] [--disable-insert-implicit-shard-axis | --no-disable-insert-implicit-shard-axis] + [--enable-hoist-wlo-all-gather | --no-enable-hoist-wlo-all-gather] [--enable-hoist-fsdp-collectives | --no-enable-hoist-fsdp-collectives] + [--disable-concat-delinearizer | --no-disable-concat-delinearizer] [--enable-aliasing-dependency-verifier | --no-enable-aliasing-dependency-verifier] + [--enable-must-alias-to-iobuffer | --no-enable-must-alias-to-iobuffer] [--disable-partition-locality-tiling | --no-disable-partition-locality-tiling] + [--enable-memory-pressure-driven-loop-fusion | --no-enable-memory-pressure-driven-loop-fusion] [--legalize-tensor-tensor-op | --no-legalize-tensor-tensor-op] + [--layout-transform-heuristic [LAYOUT_TRANSFORM_HEURISTIC]] [--disable-bir-codegen-loadstore | --no-disable-bir-codegen-loadstore] + [--dump-tensorizer-bir-json | --no-dump-tensorizer-bir-json] [--disable-rank-id-rewriting | --no-disable-rank-id-rewriting] + [--vectorization-size [VECTORIZATION_SIZE]] [--atol [ATOL]] [--rtol [RTOL]] [--save-locals | --no-save-locals] + [--no-simplify-before-simulation | --no-no-simplify-before-simulation] [--correct-precision-mode | --no-correct-precision-mode] + [--dont-verify-after-all | --no-dont-verify-after-all] [--disable-debug-info-dump | --no-disable-debug-info-dump] [--run-pass-list [RUN_PASS_LIST]] + [--dump-pass-list [DUMP_PASS_LIST]] [--dump-pass-list-and-exit | --no-dump-pass-list-and-exit] [--print-stats | --no-print-stats] + [--run-simulator-after [RUN_SIMULATOR_AFTER]] [--enable-peephole-inst-combine | --no-enable-peephole-inst-combine] + [--enable-repartitioning | --no-enable-repartitioning] [--no-ccop-barrier | --no-no-ccop-barrier] + [--enable-iobuffer-to-must-alias | --no-enable-iobuffer-to-must-alias] [--custom-script [CUSTOM_SCRIPT]] [--enable-bir-converter [ENABLE_BIR_CONVERTER]] + [--custom-compute [CUSTOM_COMPUTE]] [--enable-bircodegen-unroll [ENABLE_BIRCODEGEN_UNROLL]] [--fuse-param-to-neff | --no-fuse-param-to-neff] + [--only-compile-subgraph [ONLY_COMPILE_SUBGRAPH]] [--model-type-transformer | --no-model-type-transformer] [--model-type-cnn-training | --no-model-type-cnn-training] + [--distribution-type-llm-training | --no-distribution-type-llm-training] [--num-neuroncores-per-sengine [NUM_NEURONCORES_PER_SENGINE]] +neuronx-cc: error: argument --cc-pipeline-tiling-factor: invalid int value: '2--vectorize-strided-dma' diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b9c0683022c9822f27eeadc90ce9a23cd9fdb909 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73521137a0627d1cbcf3276af2044ea2e025b43384d5ba149c1ee9f28e06ae23 +size 88353 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f50b2cf11504f78ce82f970fe655ca35ee86d92a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66d4b1a0323238c3a55ab0dc7f54b938f398ac7b79ed19aed77d248df0ddc12 +size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ff70e52348a3e6e3f090e7da0f720a10d3a9355 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e68080a74eeee69a4cca47ffb58ac94d4475079ee317556d3c4985d658030a7 +size 52641 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..15afaa396aa5f5f7fce31d6894ab0e9447fa7497 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1faffbfaeb70a15963aaa15126906cc088054168366d15b32f56ff2235d63f96 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f94150fe135004484c18b0f1fd8f652f905ff5e2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75310b79ede900f855f7d33db734190e3d8b1f8c38de1d620c7660449b8abde +size 195539 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cfd0db6ced9555f4ed2586c5c4bbfe6f088a1528 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91462061b8dd8d78b36005464d30793234a3b8d0d65025605c7a747756879de4 +size 88814 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ce506297fdd0fb18f276fc7e12214ace7408615f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fccf83e1f68a07690baf4dba83c9edb5a4a0dcb4f9b868628e780b46605ac229 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4fe7e3baa2bf15ee32bd3598d83e71bd5ba05c3d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36538c0f0163c51c100fe984419b77741cb531f8398a3964395440434a82cad2 +size 63656 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2992d524d69c73d7d528b7c17fa44d9a08f03d4c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4f53713a26144868fa7cc36691ac32c154c015ab7fb14386dbb5f14e1aa435 +size 318464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2959084c44ba40564baec63edeba3d000e03a90b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c60d97a0eac8ef405f96ada66e7ee30e756a3edeae863d715384e398a52542 +size 78361 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.log new file mode 100644 index 0000000000000000000000000000000000000000..8a105e5ff4a6c1344b5773d39b8b50ef1e0b7607 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_42997433666d65b8817e+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_42997433666d65b8817e+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2025-09-04T14:21:48Z [XTP002] Number of instructions (7387296) is over the threshold (5000000). Tiling could potentially do a better job. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ee8fcd549dd62c0e5567ae6151b1245b771ac9ae --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a69d231302f1ace099640b628626bd426e927268210c9431a728eefc96c597d +size 77505 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1d8b3904e45054a8157db1857f950534015fe13c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b822e8a1392f8fbcacb0c80b92a975c2aa1f72426ce4d8be100569a4347107 +size 738304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8c5d44a75821f66c4d1e70779371aeaa7a70f1f0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efe5736caa991f079da38009260efd6e5c31311de7842556bd0d0445219dbaa +size 746143 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab2e7fd722ba30909b86ff27822e087e8bb1f7b2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7692c4fb4e0ccf0a62179640aa269c73fe18a81e8e6d2ce7259d3e2b0c120c +size 82265 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..583c5cfcd168decf63b00fd93e5f75cacc67b7ff --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e782164696f8783b28888fa812d4bfadf04ac5d14c7a08f16babaff3eb2014 +size 257024 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ea874d2cd8675252fca892feadb1b174efa66d78 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fe72a4dd31d497f1f07423ff84564d47c98c777313908d0a99609100ed0054 +size 264831 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f4d37cfd39c92551ae3db0b6c2384dafa78de904 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7669149cd154008a58452d032c92b6a5e39fb8724af1c68d7bb14043a0f69a +size 79837 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b9b13e1482d848ea91e8609a8d82054fe4376cbd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615385c54261c4e93035957bb1c78114fb679805c1a1e1ecb17155623f420af2 +size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..505b50a9be39a77d4e3ec188c42a88c7b9b18f9f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4c64fc408ef56adc28ff4532af1f34d6161d90c0e7b040e133a46e5216155d +size 223802 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df16b7896b8aa6a1d4656a8eb41451e57c927750 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7c111f2d45ec406435ee09555a8a6ffef6f268b057a8b803504caf1c9d3712 +size 60798 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..11896a872f7a6d87ac29fea80f99959a84b8ec1b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deef48730e90852a4f32f51f4cd98860bf6ec515c182de0c0d4a6c05aed62d47 +size 277504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1b8e59ebcdeab97a0a0bb974a740fe679aee9302 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4826eb7ecb62de311b6673fc7eb91ab6a0e787590e7562a704a4b85a57cfd8 +size 286418 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a2ecfa4f37eb969dc7b4c147656db597768d23e4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998dba5e3120e80b6651d5327fe758a558ed33d91945dcdb1d846d70cfec0915 +size 80969 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..046e01ca7a9b76578d21b0c3b778e6bd8abafe47 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:655713ff2edb98a57641f5f0f6f2b33f1f50cfd6ee5e52d099cf7893f34a1c38 +size 226304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b17e8b0031fd8af682c7bfada4b3c1c4d8def6fe --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61843a6cac41be6a991c88265006855d123fe541334c7dcc87b2c28e127addda +size 81649 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..86908f3f32b8de45b8d425d61af82836717e7408 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cdbe3ed90c5ff1392117606b148e2eb5df617c19f79d3b8ea6f655734024a71 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d6b7d69716b60b6158b40193960e810f5241db30 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02b2b247292777264ae0659d1943a3d0f43a1eb646fe6775277833e08f10440 +size 254591 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..61087b466dbcbedcbcaf8690982037fe18c29eba --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7897935bc5758d372827d0e0c19b8df2763b52adeb53f436b100581b7f13249d +size 54371 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ffecf071a5253d8545a4880b1b13d20661cb453 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b220925e5a8ad257990da53a5da69fccccd356c282300ffa11384c37579b32e +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6236c1914a83c71749472448f39484204a05f824 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224ae4658301bf1fe984f513469223007447321a4298e11617c7306a275f0221 +size 80455 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b03044eebabfb169be570f39486dadd377a2f945 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aba88a2f73d240b22d60db4385f4a56847faa84b25b258dbea541a3920f2e35 +size 226304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..289ffff77e7a6a026a5c2fa23c02001fd8b3572c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e84e9e9052add0c2e6ec077c69738ac0a387e69bdfe82c96ac1458386dac0c +size 234042 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c4524b604d502dbe676c2ccb3852a8945aa0756 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4225a0b484c5969d90e132b177d1a3c18ca6e81fa4f0f4e3009da5845a8a9278 +size 89589 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e575c98c7de3c4fb62242863ac0f7fafccc9101 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a7c903e2c000edc930e5c166c4c079a146d852bc044fb0ffddaa49b3a85402 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..07acf91a252b4060bd7befeb050110864d6e584e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4c2279119f709d38ff739df0c2d9bfba73dd25c6f3511305180565fbc3e4b8 +size 247185 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d4e34e71c6046ef9853c1ec2d1795b858bccc7bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a03cd0fd441692bbb65d7b13b869cf6feb1a37804c77d29866b9b458a2342a +size 213344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..05065ad9640205a0a739834ee8d9d5731ddead7f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135d137b79ddd84f519811620a99ae786a8c81b939fb614626c9d018761f1cea +size 420864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3f5c888bfc02d8d34c67cfd0352b40f96b280c68 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b78643a150fb95e6783be77452d355a39021ef66872e666b849586b20f155d +size 529164 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.log new file mode 100644 index 0000000000000000000000000000000000000000..0100770fb1fccf7145e9dbb78987180c4de59875 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_ae633976c414e74f7634+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_ae633976c414e74f7634+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-09-04T15:05:14Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..570583495d9603016c378cf431b27e1c6374bdc9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb7af878e983eeaf53fe336828b65513b338117f32e6a785c58b4c35483c2c7 +size 86197 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb365009c059307b342165df8d7fef76f4fef468 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d2fdb90306fdf97e0e55dc88c46263d346f49162a24c62d49d4d686a94de959 +size 287744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..34b9a2635cf4bce024582d7a44d1de8752be8896 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b9e8632d8c4814ca31e16553a0d0420a50add19d49466131c423a3605733f5 +size 381519 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4236e5f7e57a77c02223a7a866e519392aec43d1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7681e715dacfe94e1fc50eb60c4642052dc4563245fe1fb12c57efea0ee85d +size 4496384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b79fa720a111db9f3b7ba393bd90655d7ce41e20 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb9fd1d8c10870af816eeeb55b3cbd46ef931c5f8dda186d41c5c286c9f762d +size 574638 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c284eb28fb9f036fcac256ed27ab18e0f8859554 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbcd798f37abc6a8268a2a1f14eecaf3d19e56eb675b0598332b6732449a6cb8 +size 2274304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..34ea007f076e20228d4c35dbcaf81d9e789cd4d3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a344c50dd82aa23a12f89e8d87445a5b501098df08358b753193ede7b3ad9880 +size 2419673 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b3c9589c0abe241d40f836ffebf2a3109a41296 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7744f2ef00b6491200a3a5c2d547b91933aecd109db7f32d64173c4a97b1168f +size 84746 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6fa5fb899229880f621acd3720e1a4440172214 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b1600273651746901f3e77aeecc3fd6b858e011b9ecb8cda8a0663bbb10940 +size 195584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c245e7208d2108a71e5bb6ca41b292deea77c933 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d80bb8d9f2fd92756c59f63ac64013cdc4907df7fe79274b2721a07f8dbda8 +size 203386 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83075f716a63635ea1e2996401e5dcd936e8c420 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f00d9c754221630c0756817a73d25093082e96eabcec1123942f4dd55ed5135 +size 85319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..582b3488d883e1327d1b8e027d02b8138af25aef --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55af098df4713b79462e382fdd0a062bac1dbfa92b2d34f284717a8c752891e5 +size 656384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82a000aeb4b140eab5d71473b6b2bc88d42f0794 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c7c2fe70801778bea0059d8be2d176f6b09d9aa4d1f397501aa040c4aa784a +size 55086 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..87811d5e9f627c8150a89f47f8f53e81d33633b7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e4a783e394c23eff12cdfb0ab2528d1dc5a5316372ab6855be9ff69aac6718 +size 297984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f95449bb517172ab424c31a43efb67d32269bc46 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7b37dc069928dc2acf901981dc71daf9b77f17e73a157694ea7a9de6d5ebf7 +size 306898 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a84ef60cf10899aa251166dd7eb1d72c9c9ed23 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c94bbc7e5a7df25410746ca641181b447c82e7e6de4291a835d87afa5f865e +size 223215 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..21ba2343aa1dd4f33ad12442f15b3ba17b7031a1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008087529aa5bda5a1930323fe5d4e8fabc273cf06e89eb67c0a86662ad00913 +size 1260544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07e548a46ceed8bc38b348b3b86fca6bf630a010 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f864a5f78da4217ef4473095af4e74c2b102f99f76eaac6eeac946980e7d5081 +size 84132 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c91f3858eaed7d93bd0822e9c6f81586d8c8582 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e7b4fb835f650ed3c0c391802124236278c0a48c38ca890d0d306421412297 +size 195584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..162aaf9b2ce3ddf13a8f21673456b24998270e2a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fb0c41b6d33e8847f0a04d94c222a606175a978c597cc07cfb8a2d6c5259c7 +size 203386 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d94ce1eb0d46dccf1a4a030a61cc6277b6c70ce4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad3a0a73d8c5a5e97b58c00f43a0e4cf8f51574498072c78d85d11b968dc65a +size 81847 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c4a9ad43bee6247a6aabfeafc998720f244162b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955f16aeeb09758db2e7324e69bf73044bb67c40824f7cb04c5635a97d069418 +size 277504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8aa63e74d19e10951a30189d4eb26a9459b3e65 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfaca6db5b2aaec16623396ec57a3d587e61e1c2ec39288b62860818841185db +size 84464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d8e57ae12a5c86c85836343073946935140ac875 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2edcc8193e72d555485823bcc1bf7d8ef5f25f6d860adf55807ff3c7c28a29 +size 226304