diff --git a/.gitattributes b/.gitattributes index 33951935783236655a9343838c40fec3954167df..30dc1a6d4c5d27d2fa878098c5bad1d539b7ad9e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2299,3 +2299,19 @@ neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/0e8b7ff3c2a48897072a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/0e8b7ff3c2a48897072a.json new file mode 100644 index 0000000000000000000000000000000000000000..00f37b39070604c599e7f84b5d8a66d296a841c2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/0e8b7ff3c2a48897072a.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 1, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 1, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json new file mode 100644 index 0000000000000000000000000000000000000000..5cabd82efb84d5e0fcd591d8276b5615264f5c34 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 1, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 1, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json new file mode 100644 index 0000000000000000000000000000000000000000..53b9a8e6d4bf97e8891cc092df98161955e8b483 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 2, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 2, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json new file mode 100644 index 0000000000000000000000000000000000000000..c0ecae3aca95f6d474125537a85d0719e083708f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 1, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 1, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json new file mode 100644 index 0000000000000000000000000000000000000000..bc0980857d8726571921e874572072ab5fa5ce37 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ctx_batch_size": 4, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 4, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4f7368c5fc23301824858e79bd10efb84445403e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa4617fa284fce823ef66375cfde6242eebf08b02012e074f5a5f3abbb31599 +size 55270 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3bddd9446cdeb976c3fe5b7e9eec5c26da5b336a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7087ea495089609f4e18abc94cdec01e15f84a0cc008b5d70d4e23bfe968d715 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2186badaef207ffe80b527f64672c3e5aad8fccc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7f5ec5cf29ef3ef9fc74f449b06126c8bc9ed57b004e2d547eb41b0bc986a5 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80e5fd0c6e35f930db709d7c6a6dba30459704b3 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0aaf9d8a6c4a89d481df+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef776a3ae5fbed24538c890545ce1e3577e1828f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6483360d51a8a440933536a3b2de8eb29088522cc6cea6ee66e10ba7fb84eab5 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2cbbb682783522cb65fd5fea9a6670723707c596 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..97d71dc73963f060a36dda85859e01f1ac5fa646 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06bb058e8d8c8149275a9adba004933b467e65aa6f7cf60864e61cf4e111fcc8 +size 58120 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..421580a151c5e906d60095d31df0ab47dfa803be --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cad33c7d01fc95e9c4afee863df51b6aff289c1be150552fc3e58aafb0c716a +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2bbed7bce167c32b79d06ae5c765a53833d8b8b6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200387ad9a20e142a4a94b7d4417e2dee103fc729d8e279d9904b3b5de037d01 +size 275039 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..94717f3f13e0712666c09f4cbebdb7bb4347701b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5729f0e5e6817a418f67370e35db5e09e9d2ef35095e3eebe473f96bc3b22505 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e4c2f9774bc572279acac9bb6a96fc2b91cab9f Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_65a30e1d0ae793144e41+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bcd3dae1eb9dd48ef04542ffb80368f16b6ce403 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3559f6e56481bd849489099945950b4cd0e1ea5a63c95778aa69f414e58ca20 +size 55866 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb93bba511f8aa4062be2e11e20cd494190c7d03 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2fc7d49ee934c70e8062c621a4f715094507dbb7f7c8da8a9d3477c45b32ec +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..88f71e18dc0baaf312d8879a86d2fb8746b3fb7d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0ebb84466177f1eb5105390edd847097e8ae08efa07ed94c1ca4e00f5b2e1b +size 55270 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f08830d214dd0480682bca5fc860d0297bb0a9f6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce307477efd035cacbab0ee149782bac1dac310214cdaec2cd0013898150d92f +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7123af2f93cae8966eae7cb286c1f0bea14c382c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fcd521fe2fe34465485c945901503af18888bd8cf7628b9ee03b33d096efabc +size 65457 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aff7f3b88d4d6757b93d757f3b38faf20684e06b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a3738511b748888ed3db+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70de9e6f0b94f58cab304fb487a16a3057f9736d7428f9d9c0577bbfe686efb0 +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8b23fab3d1a654fc977223e4793d4053587f5023 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8706ff9ebc6cbda4cd0bfaeb9badd4c4c2551ddb0e01ded252607c40fa6bf241 +size 429764 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5dd8fecb73c67bb2be3707cb23d8dd93d689c6b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d84624a9deab3d606e7cf166c891110470889dd995128188df6b46d00ac872 +size 24515584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..06accc9c5ff798f4530f932058f3721e4e3e8f76 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe649ea431b500751c457fd7ab93001853239f58cdb76eb3987c9afd8748827a +size 394018 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..141b1571ae80580c409e02ce44d03697063e17f4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd11d6ccf0169cb1e1f16b822821d73b27ad954f53fc65f12817f716e6127c7 +size 4332544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0360456b49c946bf475c132918629171b96b88e2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36600189fa6b5c8bb488dc96e38237c3aa521b40c7dd71a518975ce0ececb244 +size 4402415 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5f3a91f015e8533b9b611f8f73d644ced241bd6d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07822f0ef5f77dcdf7bc1a73632a04b81876feb6485190c3d908a84688569da +size 69058 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..90beab8be1342bb0e686edd522bd78a534e0ff60 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48eec55201680072dad719edb1b789358b8028a8e3ad1b13998987aad9385ec5 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c6ba173442e3618c13d8186f2c450cfee045dab --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0569f3a641913a8553a9e73ae662da99532bec69e2ec79a0208ecfb478ae786 +size 57479 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dfc7fcab66d6137fbf60a7a2c1a2c45a2a1e9b6a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b47be23f9ec52311d836ba8c81ee7f34cbdb88006a619bd4e3254ba942fc808 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..54e5b830931423c3335df6ba0322794fa49224ee --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95736247cb7341459857775f1f4f97d4adf0b6511ed49370a403fba0a1b4855a +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a17c714e583544af0f5f16c1807616eb2ead967 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30360da067ec244788b37a03e0074e725241b5704b03412f893d53ecb7f98883 +size 57479 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c09bb72f75411fa288fb22a23309370c1ea4f035 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385f22ae6aefb0889d0160457c29cefa59ff5f746a3123add0137777c373764c +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9370095f311385316f809ca4c134009ef5e9687d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fd557962db74d647b1d5362625a12b3248ab4a468132733a4f4e2a51b7b813 +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..01ff7165cb710e023841bf03e57cc21509c3c8cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229df84817e88f2c50c971b89ee564389919edea7b5adb3fcc66e99fc0d612c4 +size 57669 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b47706049627b0aeaac3db29f07cd5086b004869 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2821b04a848a7b32a10f7b175d4644fecd80a8bfd35bfc6d4d4024f20f56ed60 +size 472064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..dc1f9b923d632c93384325364976a72c93b0e4c1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1513f177c7c72fb7fe16996c4601a7c85d866f76dfcaa9ddb24a0ae322c8df7 +size 479903 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..505fa3507914323e2fba09c45c0eeae49294785e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38797959349de7e57bbd566b3d27770746f3f23b4cc8ffbfbeaa6afd0d5385de +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..efc63dac52fb0d56d97c4f975cdaa12135efdec9 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.neff differ