diff --git a/.gitattributes b/.gitattributes index 8a61bc64c85cea27bc4e0e86b3c6e1f7e17cada9..4b614bd02b35ee07ec8729ebbfc4ceade0c8f6da 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3737,3 +3737,42 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ba2c4b7abceeb84f9017+a9d440f5/model.neff f neuronxcc-2.19.8089.0+8ab9f450/MODULE_ba2c4b7abceeb84f9017+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_e389728de68772abbfd8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_e389728de68772abbfd8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/594723eef94c88d8d691.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/594723eef94c88d8d691.json new file mode 100644 index 0000000000000000000000000000000000000000..06995d0e6893da99ffce03dfb03b7d4e481b592b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/594723eef94c88d8d691.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/77a7a8d68edf4a4fa67c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/77a7a8d68edf4a4fa67c.json new file mode 100644 index 0000000000000000000000000000000000000000..78b4e49aa0fbbad389687974f77084d778dde209 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/77a7a8d68edf4a4fa67c.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/7eb46f495d3f76a82dd2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/7eb46f495d3f76a82dd2.json new file mode 100644 index 0000000000000000000000000000000000000000..3f1d18b73e44f967e515142a73d26b4d298f57cd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/7eb46f495d3f76a82dd2.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/1b7021104f86fad69b36.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/1b7021104f86fad69b36.json new file mode 100644 index 0000000000000000000000000000000000000000..eed2ab08664507691a70fc1c9403da912008e799 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/1b7021104f86fad69b36.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 48, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 48, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/4019b7e23d39b9aba408.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/4019b7e23d39b9aba408.json new file mode 100644 index 0000000000000000000000000000000000000000..51f41a921d056fc704b644387f184ad36d8431c1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/4019b7e23d39b9aba408.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/6f655fd89586def868d5.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/6f655fd89586def868d5.json new file mode 100644 index 0000000000000000000000000000000000000000..07e98ab434e83379baf03a9b20e496bdac8ef7b0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/6f655fd89586def868d5.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/a2a4fca0e3525d8a4cfe.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/a2a4fca0e3525d8a4cfe.json new file mode 100644 index 0000000000000000000000000000000000000000..c3a9e73ad24a743b89e378c9b5dc3ec473400083 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/a2a4fca0e3525d8a4cfe.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/bb61aa189e8d2f842903.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/bb61aa189e8d2f842903.json new file mode 100644 index 0000000000000000000000000000000000000000..9613a87b7eaac7cb6982e898fb16f0b0092ad605 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/bb61aa189e8d2f842903.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/e297942c5e043e02020c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/e297942c5e043e02020c.json new file mode 100644 index 0000000000000000000000000000000000000000..abf296f4183f8479e3f680d0b08ced79eb71af1e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.1-8B-Instruct/e297942c5e043e02020c.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/2da687c92e59a23117ae.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/2da687c92e59a23117ae.json new file mode 100644 index 0000000000000000000000000000000000000000..12576a47f99b970d571206d012c8f41f81693a24 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/2da687c92e59a23117ae.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/6e4e8f5f8aa1aa3c89d2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/6e4e8f5f8aa1aa3c89d2.json new file mode 100644 index 0000000000000000000000000000000000000000..34b81dddb0ba38c4fe987d0f01df7c1c557647cd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/6e4e8f5f8aa1aa3c89d2.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/a32fb314d1a0a7235f26.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/a32fb314d1a0a7235f26.json new file mode 100644 index 0000000000000000000000000000000000000000..f710ee548185b9cdd1ea7035dd1baa2a872bdec1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/a32fb314d1a0a7235f26.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3c39e1075ddbdb83600e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3c39e1075ddbdb83600e.json new file mode 100644 index 0000000000000000000000000000000000000000..eee9b4f4aee59d669c583da8513dbe4e24c9e2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3c39e1075ddbdb83600e.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c5cd6e33996e562804a9.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c5cd6e33996e562804a9.json new file mode 100644 index 0000000000000000000000000000000000000000..b5f9306be310ea61e14851db9ca449a204b74b56 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c5cd6e33996e562804a9.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/cc4cf4add3a1d0bf7d48.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/cc4cf4add3a1d0bf7d48.json new file mode 100644 index 0000000000000000000000000000000000000000..2f0b764d563babfe86e20eaae7b2f763f680100a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/cc4cf4add3a1d0bf7d48.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/747de80de70e6add4ff7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/747de80de70e6add4ff7.json new file mode 100644 index 0000000000000000000000000000000000000000..023e8cfc2902f9e6375ad45c12e9c3fafc7df339 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/747de80de70e6add4ff7.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/e4a7ada90932e8434f45.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/e4a7ada90932e8434f45.json new file mode 100644 index 0000000000000000000000000000000000000000..02c2834b3a480d9fa47d4def84146ac4926183d6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/e4a7ada90932e8434f45.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/f958e14fa70c14154501.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/f958e14fa70c14154501.json new file mode 100644 index 0000000000000000000000000000000000000000..b88b8cbdd8d43650fa9db15d9f0508c1ab793d16 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/f958e14fa70c14154501.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/92fe77cc5ba75666c842.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/92fe77cc5ba75666c842.json new file mode 100644 index 0000000000000000000000000000000000000000..8c1f0ff59561b0be24984fdcf800a219ae944417 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/92fe77cc5ba75666c842.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/13860b2213768201ef02.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/13860b2213768201ef02.json new file mode 100644 index 0000000000000000000000000000000000000000..78e501f49d05b2a2542131b73b04ac5183af9b7d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/13860b2213768201ef02.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/7b5654249c5375ceac81.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/7b5654249c5375ceac81.json new file mode 100644 index 0000000000000000000000000000000000000000..11311589f85aab924fe593187836e9be6904b2d2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/7b5654249c5375ceac81.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/ba1c5664b347ffab7c80.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/ba1c5664b347ffab7c80.json new file mode 100644 index 0000000000000000000000000000000000000000..d7caeafd323f63e05df65753dbea52a2555c09db --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/ba1c5664b347ffab7c80.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/2424e0086cccb9177782.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/2424e0086cccb9177782.json new file mode 100644 index 0000000000000000000000000000000000000000..2bcf4646876dc286f7b74238d52015c0d26d9b84 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/2424e0086cccb9177782.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/f27a9cc01514c39106c6.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/f27a9cc01514c39106c6.json new file mode 100644 index 0000000000000000000000000000000000000000..c9a33c58e2132e82db63a522f97f176239141091 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/f27a9cc01514c39106c6.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/f500c292922dac64fbd5.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/f500c292922dac64fbd5.json new file mode 100644 index 0000000000000000000000000000000000000000..b1e7088ed65e6bf042f2f4ecf18a917e0f52cf63 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/f500c292922dac64fbd5.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/ba3c8d2cd42ff03e26ae.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/ba3c8d2cd42ff03e26ae.json new file mode 100644 index 0000000000000000000000000000000000000000..dd4380e301a74274e01e4667730ae2cd879fcbde --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/ba3c8d2cd42ff03e26ae.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/bb3997795f0545f61fc7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/bb3997795f0545f61fc7.json new file mode 100644 index 0000000000000000000000000000000000000000..32e780ac1021112a8cfdcdb566f74321931c82c6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/bb3997795f0545f61fc7.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/cd35f3be7d730903197c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/cd35f3be7d730903197c.json new file mode 100644 index 0000000000000000000000000000000000000000..dee078669ecfbaa4a24419f83f6076d5c6146bec --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/cd35f3be7d730903197c.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..afabf1ced8c28ad300df426497004e3c98c006b1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da8362d592b1ece40c894b218c48d8b57d8ea17f047008d50eddfe1703c7497 +size 878243 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bdd053641c3a7100eae1babe504a779cc0f69895 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13eab2ef5d1e6beaf6c4+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7313dccb05e09253e16e337c8d6b2b013f29ef9090dde628b70d5271398a6ac8 +size 31038464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dd48c0dd4f41d56df02b92c6ad5741d4c075968b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0926e3974d52dbffa21af62f73c9074b928be94daabe87276a0ea69e98176b +size 1933833 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1bf312a8bb36ea590f7dc3785829d4e773de65c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9b4909f875eef73664d1ca5e15b3efa17dbbb731ab29e612082ca465ffd47c +size 6749184 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..41cdb0d1fbfed503986ca7a959571067e682daea --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ce2001d89cd05108a83+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2425fc93d6eeb2f04f2077e7e19532545f383409ebb89b85ec8c2e68c06da909 +size 7092759 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d646ecc4a6fe09bdcda19e1750d258b5f5e24a91 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cd1aa5555f89054ee55c9463dfa9042e5b8418febc18c5fef40933610b0966 +size 2155864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..39e3663efc09bf4b60146899c986140d29a6969c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_208b5cdec9945742e55f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bd8c3c95a11f096a651276eff0495fbc50a2753ca98494da49fd8436d57174 +size 3482624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..951d37fb82f3d57b31e1492d5fe5e71c38154a9b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90bf2fc56a6966a428240c312793a054c852b553ceb04e2b18ac05d47169e0fd +size 948667 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2b476a0f4511714c404d6b01db2fcbd3d5b0bb73 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2dd18a2e64377ca22821+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a18cf768b9ef7fc1b28e4b2505733a2edc108b7f806bd38658c2a15a66f2185 +size 31130624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7ec5ee70887291dacab3882bfeb1876da5e8b003 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb1ddda1c7ca2bfbfc9a52ab7c276d3705b18fd4d5d0700e83d2f12acd4e8e49 +size 948667 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..12b1714b0e86fcbb79ba394cac67e48fdb9e8fc3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_36d3f7e81b08baeeac3e+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e1e95cfd69fd062468f0cfd8f1a0e6f9d902cd08b379039a3d6ef18ab1c441 +size 31130624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..94c42f3a0c8fef987b723d674e3ac5fbca555223 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4228544c13324f06d635a7d3b6e7c7aea5d183aa1347b0b2e1f16b6fe4fdd553 +size 1934251 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b4e413b599437d84e12c9972c54b2999bb3fd6cf --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea45bfa49a8aa7dff0c7d1426a0df5b64d269bb8262b424de3b6616e11851f4 +size 8244224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..024776d7a8dd3faa83506b58a21b8d044bdade43 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5651710429bf3052ff3d+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a150873277b059acc470582ff739f0a5b8036759ff6980cf3a13a1c7c35cf1da +size 8587799 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8ff605888cbb1d204902f977c5ed09441aa22b83 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83df338cdc7457f1d6308f111ba6aa1ea7e0dc6b23119e11eedf265dd1eeb26f +size 1707884 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/model.log new file mode 100644 index 0000000000000000000000000000000000000000..db8dc02d72bc4a0c825e994d40107af2420cbdec --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5c299e5c9c9400c82b3d+a9d440f5/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_5c299e5c9c9400c82b3d+a9d440f5.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_5c299e5c9c9400c82b3d+a9d440f5.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: [XCG815] Estimated peak HBM usage (18.607450) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-07-16T15:21:24Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (18.607450) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0f638fb6fe8ef95045332419954061baebf4f79c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a103163401d13a16f1794d9b092dbbdcafafd914e56913d247ab33ee78fdda72 +size 780488 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b27ce779603da1a458348fe7ac264e745837b749 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62e4632f1b2a753d71339e6643a5ae156dcdbacfca9b78447dc0cddc861edce +size 2018304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3dd0f524ed405d740eac374a62f89fa9db15bd02 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6012fbda641585414270+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d48ab95aead3a1709e68f5bd1b8470638b36f5a6b3196058d90e0d67be26fa3 +size 2156630 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e5f9ffa2e9a9eca2c920448069b40defcc59a848 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e436c34cd40a3fed0a29da65b33ef535ff980ef76e430519ee6b554119e26f +size 782445 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..adba544444ff9876d26f85d13da6187b3a431934 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18ca203ebe166577a89b93118ac4f0bf8d4ce5d113b16aa36edcde22bc1cec8 +size 6308864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3e7ad7e35afdd2d090f63ccff4e2ffb6c23b4670 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_64421a46c0af9b9626f9+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b468f6d7be4cb406ea9d2b54293f2605ce6c968c36ed7d007223f511e55f6736 +size 6446681 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b791e821bc76d5bc32ddd92cf7279911b0e82419 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b37bb0d7cd3082fa645c76418164e31a3799b87ae8f9df388bab5cd71966cb +size 782045 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8fd433fefca7e87e21b3b56af85c6be991351a89 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8012bcc8e972960eaf5aa97b7cdce2bee9617f3e462aee41181e1ba289ba7a +size 2509824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..afb82eb610f65ef0d8d3c7661060f533de84777b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6b1226d9e5f977723885+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbee8d05b687721b5bd1b80cabacd5980c18e2abc3bb6c96ee77de99bac967a +size 2647641 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f59f4b3aabbbd3961f0f0df9f1f8112e29429b29 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd5032b3a78c3ce711d7c1bf6fc9031ccb729ed5b54dac02e32cc5cbc19236e +size 339944 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e64c3f90ef652924cabd9a8dc93d28ffb922a62e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6bfc8fb51c997f4542bf+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cfc023c0701e0604640e256742873dcd2d49dd8daa90b323623b779b3d8d4a3 +size 7711744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ca955274195e2033cabbeeceba1343c46568c558 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e64967b51c4f0946d60d8c36ca2d511fe86dc8a2fdcc04b3828f0cd893a666e +size 947655 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0b654c192a63ad69860727936fe82f06faee8bc8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7aef449af2b112d5d3d1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a33f4318fad3d8e3b04771d07855bfa78bb014d488a9153b72ba8c2971e6fcb +size 31130624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f36cfd8165164bdca33ada03e4d7e627b7b851da --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d6d050187e527a2a06c486b8325b7a096c717442f0c9d7ef5fce5d14904992 +size 948667 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5c4fd741f8e13c61592e36f5a4f11803a295ef1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ff072ec147149da3f87+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c23d40015e83c11a418651da9814b3baa171d2874644907137cf97547b23a9dd +size 31130624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8c280f12c3dda1575acba3f054f79a61da65aede --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651b6bfc87a6d5ec28f06552f87f62076ddd9959ca495cda042c18aae0a47cae +size 1934251 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1bce4a9bad409b9bcc3574f3bbf0afdec6de0e7a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81feb771d86ac35cc0792bc58ecd59192db306985a5ea55cfcf1843e48efd4e7 +size 10015744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..56e6f9208356e6fed5ea18752e741a1e011a8f42 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93a90f1d91237eb02666+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8e4a34c46a18a13c9efa96ae0eeb3654f1d102877c7e96bee631c648a06e639 +size 10359319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7d29faf1d2ced040c23f6d7e9be725023e9268ac --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10459a16f15ddcfe0d280c0a2647c552db26b7c912b8a95427a668e2a7654ba7 +size 948667 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83bc5c31e51f60f87b4aa650c0b57b014081091f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9abf85f80857baabf8f4+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be0bdcfaa7924da7c216470589dd4aa409e18992f3753f4914f85f33530a1e0 +size 31130624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7102fe03a4926ac35a870d8af2ae9ff03463391 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74942bfd45ae5405122aa65217409fab89262455d5c325c40cb5281513540a97 +size 782061 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b7d78c9efd761a42ef4aac48b5053dd8fe595db9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46adae8cf02fdc759b4eaeb1dbb6cf9e090a7d3b42f08f59d30c63230c94df01 +size 3871744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..88c7839162df339ab24db23c7e68efaa90cc6b8d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e8e2d5376637a6d92d5+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b709c37951caf6997578a91fbc8599b5ec279f573ca171e45e2dba4d1290ac0 +size 4009561 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59cfa4959f3f8f666dba2637be1a7e8e58f0628d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab29b3de9d90b403ba94eccaa0990eb73401f44ef4f3b8e6bbd779f09ae9490c +size 2339736 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fe030bb4f7f778167a35d9fe632d70d33a945eed --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f8d06795f4a60e23ed6+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4524129761cab114d29167ba825dfb42f91f45a70e8358cf724cde23b18053 +size 3369984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7a2f950c2e945de6d7bf7ea7ddc4d27c1c6bb8de --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0418c785001a5bc234c3f0ae6d34a64d7a827b3d9d2bba51223ea0779171af4 +size 781501 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..94a0c787ed47912e580d2cef18e67df2b8b21f4d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3913acc13889843ff991d8cedcc465883cee90a78a9c5da0575ea5e503df64d +size 2018304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ed004df7e4df2c72b6c785f5792e564bdaae7c25 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3bc27efedbb16c4e6f4+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f747e26c22735d536d9eddd56dc8a9f96a3e4d2484e6fd7d6a2ec2cb703ed68e +size 2156630 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ca458cfafa0e84f87cf2980aadfbf76bf639cf08 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abdbdbc1620d76c7bd6548c411930872c55678228f8c0aee25051f740ef06dad +size 339944 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ef4c97f1e80133c16aa872c7c604fb83524361a1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b286062886c7bdd42943+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5b00efd65783d0fcd458774c8fa13a4113fcdfa0c965f3cdad457bc23d4579 +size 8182784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c8f7d50c440fa2f6d20be6ef6632a17f6c701c1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:605378dad095de97929520a3e05cbf0fdc7073033c31b6fcfb5d71dd9fb7f419 +size 136009 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4db0ed3358f67c028edfc0c52e81c772ecdd93f1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b53472984f7e303e1632+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99d0e374f256e16f62e8ec6580a964915cb886c4c6b4cd5f40bc7139aeb1e6d1 +size 2192384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..66c65bf8e1c044abed4475448eefaa82b939d8ff --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b743626245ce26ee197f7e5a2c8a8ea76d9cf72f8b734f4b1a7aec1fc344b8c3 +size 136713 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b9f7956084061a322ce4561c5bf7c2842d8764bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b769276aabe4a0ee1e27+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47edfbf357809d042c2e479e004842c2bf40716623fc3ea70acaa7b462ae6657 +size 2315264 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e4d26417921820ced2da06da53414fbabe61f9b3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311496c523da3487b596fc4785ab938da1e8a4f6b820058c20e43747cadbf139 +size 2339736 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2cb7e301d6fc916771d6b5e62f1eb6fed759a7b1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8d95a51f9d39a75095c+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edc46ad29e8eac4ed5c14b713ebf22afa007ca2094a3235bddb6ff41e8d1368 +size 3369984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..423e7f16caba5cf058b24a3244e9778c6c1f09bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5206a1bad2346052d4f793a63576cb03ad9b028f6df5d8ca2c7ffba77862f112 +size 782445 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..28bf9f52371ea281a6ecead138b27da876cac0c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512622d1409fa7400118812f09f3fd153b70389bd413350a227b82d0208b8cad +size 4946944 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..48145d2b54f048df44cc8bc3533590d72871f07e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccb289fbd616d79a48c7+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ca8269b71b318426855b4b49c63d81a86ec12919c2837157154901478bc8cf +size 5084761 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8994f040cb3ebf929778aef9bad07258dc659e75 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fdeb8382c45968bead125fc2013ef1cf10f71d32671a7317f0eaf0c37fbe66 +size 911949 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8cfcaf4f354cb7c5bfb262976f2531e625ad18bb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d79f6b08254760c9c964+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:995a365824000b3f636080eaef0e6a04ce5e6228d7f01d2a1e768cd72ae16cb9 +size 31376384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aff88ed5f7f762758727393b7051ec1498b6e081 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59337fbf8282614d09e1660450e096f245338721c44cc9f5a2f7d408a24a8ed +size 879255 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca00738a38380bfc0af94cac3a5d1dbf35d07ce7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7a5d87af37fabf21231+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b242f332e808dfe039e25c87c9c62b0e51f92b7811a7697889aeb56de7dd49d +size 31038464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83758cefbaa97a28e96484723ee72fdc4b98d127 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b52149cacc63749da7500470cf7eacbcee0d2946b96c879e1a77787a202431bc +size 1707884 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/model.log new file mode 100644 index 0000000000000000000000000000000000000000..103451a78cb9fd85c1dd7d70accf48ff042a3605 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_de64611fce1932fd8904+a9d440f5/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_de64611fce1932fd8904+a9d440f5.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_de64611fce1932fd8904+a9d440f5.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: [XCG815] Estimated peak HBM usage (20.507986) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-07-16T15:09:01Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (20.507986) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dff7f8358012a512e9ce+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dff7f8358012a512e9ce+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dff7f8358012a512e9ce+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dff7f8358012a512e9ce+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dff7f8358012a512e9ce+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..465ba7219b1abb3bb70f2194c3ceda4f39d305f4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dff7f8358012a512e9ce+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd8cb49884b234076d92a0622978efbacafb0807b7da9019c334e20f2f154b7 +size 1926971 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4acbe331f8a8152c09b5ffc6e58e5e5046516654 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a756f85e977c00c4a594dfa3f6bc8c6454796ecbfb39a619c952ca98ed22129a +size 948667 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..809fec09f188134d84d0f4f272ae90faa9dc4e4d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2e447c621bbe7fdec98+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aa0af1fc6d79cf1fb06c92c1ae7aabc51710eda6a669143b60305719a4d2c18 +size 31130624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..60fcb2dc4954f37fe22dc19dca259f076f5f0b5c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03253c0327da54a941c42a4361a9503b1ec498bed1ab603739d58c35285b4088 +size 781430 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..23459fd68c20c7f2788094beb5f4e2b7f8caa887 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6141d61f1782189fcfc3027708551938bb2bf53bb2a1522953c63e193bef68f4 +size 6308864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d668d6471e537da1a17f420d824240538902de30 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f4ac592299175634c9c3+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f7b0c7758d6bc830e532cd0114c6dac4bffd2d12f857df0243dd52a2af6085 +size 6446681 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..73711cbab1d2740d60683aa7ed0ce1317212f597 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7224eccbe1058e5a47fe0eba39ba233f4357e766a58a93da13005cf6aad563dc +size 1707884 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/model.log new file mode 100644 index 0000000000000000000000000000000000000000..e8d01953eea24dadc469cac6bf567d2ec0bf0b19 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe38d1e1ce28581b10fd+a9d440f5/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_fe38d1e1ce28581b10fd+a9d440f5.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_fe38d1e1ce28581b10fd+a9d440f5.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: [XCG815] Estimated peak HBM usage (16.708521) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-07-16T15:39:18Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (16.708521) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bc76b618c72a0f391906ae21d6a1d984cad7486 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1739fc005391cf0fc53cedf30cc1e6f87a2d09602ca192452244c5f754587365 +size 136713 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e906806fc4e034b64947c4fdfcc980cae2e5dce --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fed92cf5d3e2df66c98f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb69f7b869b0d4951dcd914b031221ee3f45b9cf2fdfe6b18daa50eb7860381 +size 2315264 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd1414aa0bd16e875415527f1ffe18512eb69395 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25283536e68183d4c9892cebcc99ddd12a74039b2dd9be627aa694af3705724f +size 782045 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df610828d47d67b878e980f6fb988bc5a316554f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e2cea1471530a8ca5df068bc4942790bdf09d8d4a212dac25cba0e0dae0d62 +size 3144704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..674c72eafe6c00ea9c5f313071955eca3c44576d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ff2ab5c1ff0b12304ea6+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0f5e9b0d158de49366b5e881f0c266fbf44d3d899cdf03b1ade976eb54ddab +size 3282521