diff --git a/.gitattributes b/.gitattributes index 311fa051ad70901b6fb0932ceb42e999b45cba9c..5b5de49c6082d33a4480308f025b563830ba779a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3820,3 +3820,63 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/wrapped_neff neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/199a909e91264c5be485.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/199a909e91264c5be485.json new file mode 100644 index 0000000000000000000000000000000000000000..f74c92b2f37046bd22b3591bee5c0063fae1f582 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/199a909e91264c5be485.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd6196fdf65916b9e7e.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd6196fdf65916b9e7e.json new file mode 100644 index 0000000000000000000000000000000000000000..e7975667b74d4c9ffb4dd02add64463218336eab --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd6196fdf65916b9e7e.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/da9c5303a7a8399f4728.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/da9c5303a7a8399f4728.json new file mode 100644 index 0000000000000000000000000000000000000000..2f2a9146bd625d1e0376e0a224cd3a030f0aad9a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/da9c5303a7a8399f4728.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/ibm-granite/granite-3.1-2b-instruct/8754088ebe447523faa5.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/ibm-granite/granite-3.1-2b-instruct/8754088ebe447523faa5.json new file mode 100644 index 0000000000000000000000000000000000000000..b21d5e01d4fdd01f82a0882e8b89a9ba466cc2b8 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/granite/ibm-granite/granite-3.1-2b-instruct/8754088ebe447523faa5.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/2215dc8042463633d3f0.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/2215dc8042463633d3f0.json new file mode 100644 index 0000000000000000000000000000000000000000..c803e6df5f379cbcd32174783da42d9f9a6496d7 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/2215dc8042463633d3f0.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/b376023c81bfa4abe720.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/b376023c81bfa4abe720.json new file mode 100644 index 0000000000000000000000000000000000000000..117aa11d4fd2e326165c90842bbfc1153b5c95bc --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/b376023c81bfa4abe720.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/f251731c5ce664058141.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/f251731c5ce664058141.json new file mode 100644 index 0000000000000000000000000000000000000000..070c0090838fce660d84a2e1bbe32b232925cb6e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/llamafactory/tiny-random-Llama-3/f251731c5ce664058141.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/58701e526f964300dd41.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/58701e526f964300dd41.json new file mode 100644 index 0000000000000000000000000000000000000000..30badb1ec8e534621ff9b47f9e6543ea886c5223 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/58701e526f964300dd41.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/a1ddea966fd8dcd09abe.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/a1ddea966fd8dcd09abe.json new file mode 100644 index 0000000000000000000000000000000000000000..f6445e481f988625472dc198b82868e028f3762f --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/a1ddea966fd8dcd09abe.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/bf9a0ad012138efbed67.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/bf9a0ad012138efbed67.json new file mode 100644 index 0000000000000000000000000000000000000000..4cd5c575a7fde018723e9b335ebb0864aac44838 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/bf9a0ad012138efbed67.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/c0840c79130939a5b5d7.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/c0840c79130939a5b5d7.json new file mode 100644 index 0000000000000000000000000000000000000000..3d168a6a5eb2a42bf358da68a53864f8c20d9d69 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/mixtral/dacorvo/Mixtral-tiny/c0840c79130939a5b5d7.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/b4357f21aca6e501d91c.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/b4357f21aca6e501d91c.json new file mode 100644 index 0000000000000000000000000000000000000000..885dba0e3866c7f118f3314ebb101d597d37ae8c --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/b4357f21aca6e501d91c.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/e465629af396fe635f76.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/e465629af396fe635f76.json new file mode 100644 index 0000000000000000000000000000000000000000..0a52bdc14b8c21b56378f89e60984a5f7156e155 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/e465629af396fe635f76.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/e761ab5a54cc018c1cf1.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/e761ab5a54cc018c1cf1.json new file mode 100644 index 0000000000000000000000000000000000000000..1d4e3670f1131355b37a4dbbae4ad5eaad4e3793 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/phi3/yujiepan/phi-4-tiny-random/e761ab5a54cc018c1cf1.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/57adbf1ede3e707d0897.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/57adbf1ede3e707d0897.json new file mode 100644 index 0000000000000000000000000000000000000000..16c617c1e94c7a4d2f92be7983db14cef1054877 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/57adbf1ede3e707d0897.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/193df09e6732fbbb116c.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/193df09e6732fbbb116c.json new file mode 100644 index 0000000000000000000000000000000000000000..004d8dc5f32f00f1ddad8c9f9a4c185b6b58dde5 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/193df09e6732fbbb116c.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7c0dda1c917359c9b9d9.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7c0dda1c917359c9b9d9.json new file mode 100644 index 0000000000000000000000000000000000000000..5923ab08c45c406e291057670a9979a71b462444 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7c0dda1c917359c9b9d9.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/c2488ca5e3b35076fab4.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/c2488ca5e3b35076fab4.json new file mode 100644 index 0000000000000000000000000000000000000000..2b2fa05f34a0a6d4793b3dd36880d400bdc32c42 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/c2488ca5e3b35076fab4.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..de762b4227665b1b0fe5c02b7594bc021217f9ac --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2407e2272d3d144dbf410618284b0f2c0f2663b358d5958bc4343a79b1717945 +size 81899 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..72ed0a733a0687d91c9501abf89d3d0ff0d68afb --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_05fa2bbe33c465fa6028+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27eaaca8b2bcbc0ad2b82118de895ed5ef25f6aef54272664ad56dcece9fea5d +size 236544 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f36bd63eca451aea489c8c49fa3423e23bb0da0e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09bc195566c41e9d9f79bcb7ff575d29e270b3f183325ed7b9cb5c5c05369605 +size 82456 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5fa74a8534d97a7e90ba13018b7d417b32e28bc3 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0d1769ff0e8722ea7bd3+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b904f1a0e5ca3bbf73c9e84e696fa4d2f2c7bff8e8e23caef0b01b4ba92330 +size 420864 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..50ff659f624119d07a5cb880a166fe19fa008b89 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc564c78c9e2a9c67927b13cfeb427f1755a039f1efbb3fc65845e0b6ac57df +size 84609 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..60c8a56576c8a040c321e9903ad51ff2a520b1f5 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63cf5899873bccda25078727cddbe0a964348d337fd6ca871dbb54257bd8fb7b +size 185344 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..52a8c2f316c5aa1a4eae0a2b51591f7d085cc098 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_0f186c2f1fd5ea8194e6+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee7cadb30ac2bd4f7d0b08a8c7c3490e4d801cce2b8aecbb7d21eae6e3e9eaf +size 193114 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d4efb6a62361bb4fca80ece8c5e3b792ea9bfd3 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ec8adc0e62d42819e2e31a593d500e4ecc872b5b66fad58c6413dcecc02964 +size 10362 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3690ef9c41eb09eff83288bf65d79591b67936a3 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_147e4bf1152204dd48de+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c1ab58f32c71f1d32dee441b486f37f11dc3898e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9301197ab622d6cf7fc9b997b77ddb90e143075decfbd981455569f9c3a19328 +size 7011 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb03284e72913aac7802720ae2c60bc5a55a4be5 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_159ba669f1f1a100fca9+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8508b0621281903c9e232c5a5e5db55139cf2c83 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f79e1d9744e31374274c65877eb1317f14db8f679e3c660305986a7cf9f00b +size 7011 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f88926865612ed3bbe2902632fde2fe3520a34ac Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_166cc88b643075ffce7a+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59eead8f3c81d758cfbcedfd85fa90ca5634f11d --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7a7cc9263f9d584bf705c9853864643e3b8f22eff563f9c4c24a66b3aa5909 +size 86249 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..095a8e9c1daaa98f8b8f63d1408cd0d73e2a2025 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1b2bdeeb4f1ee675f073+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5f3dfc773b1978f0f0b4d1dc52ae97b93513822eeb4f2ba104147119fdddb0 +size 328704 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d9e5d1b7df70a593f6b715d7c00cf009a7c74aac --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e723308dcc002feed145e81b4fcf3f7c64f510f4733e9d8609f0851facaac131 +size 719500 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ac199ebb207fd6b041d1b5c5be66a3f3c20b8e0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_1e73a5b5a7e84e57a4f3+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700b24a5d8e1426fd0118190e91cf26b0592e48aa524c02cb6bf01373136db03 +size 19835904 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4605a22359149ac635ff27123b48254020d03bb5 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde49921fcf4817188afb754cd994ada6c7cd7d1de560521a0f5118e78f13ee7 +size 81635 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cd03c3b5f7e68b0a80fc01cda924c3c01185edb7 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_20adb6f2584126fb54d9+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373b8742bb3fbda5f56a4bb56ebd9f9d4cfb656734898e9010f7cda50b7bba34 +size 359424 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..87a7f560a1401367b829db28c819338574d4f4e1 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2d152e44bc81ce51bf546c60ba8892711cf017eada4900a25633d1fae7f6ea +size 1071223 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1b4a1fd4d8dd088c7e9f4c104c0c3e455dade2d4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea405aee7727d3d3a1930cc6f23e916a6592cffb23b40bdd72ac5fea26565a8 +size 4998144 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..60deea72b142f377c156ad811188eac434795151 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_24d3a1081355a38b4dfa+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71f8ad1864ea0a2b240ca0845db288867b5ae146c2c4b982556304f3334b7136 +size 5171427 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..96f0f665e40bbd914c5329bae8672a3bc9702f12 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bf8708e5b7b7b674bc3f8b59d8de802d7ba0a01f9e0c21b15c2507bb6d8fb0 +size 149912 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83ac8086b2b91bf31f0a3181e2e2d715dbab07c9 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_44605f8e819b2927ae51+ae6a382b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38da02efd0a9f026e79cb0e5ededdc9cc35dbf9ce6916c0456bfd2f4445341c4 +size 594944 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5838ca8be53444b5efaa2a10a6f9afd10bee2e2e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b84b98799f319ea2dcb578ec9b21634dcbb00c7da8593652d086b62332d3c60 +size 80936 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d557fd7563eae73d625ca477a9967e371ad072d --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3fd834b9341d069c36aaced5e1f03d2be84334e3eda3e040f1a231cf193da5 +size 216064 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..729166f91ebb4da70c9abaf028dc4a842e7f5cdd --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_453c6a682d4d9520fb9b+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ec42382fa023b036a33e9914aaf296bfbc3a8d0e09980ce92006e16dc19efb +size 223770 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ae7bce4b8de7fcf7df0e28aedaefdb16eb1550ba --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f196cbe94f9c97c1afffe024a295b1068a437a746942c52182ea0899d11e109d +size 68831 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cc521c0352a334bb151cc8ca4748bbc828b0c3ac --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac30304ce9f6e8650b90a0cef1f999a06fe6dd37f16b099a181969382b89c31 +size 246784 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..559a04694f955153c14bf585922f45b4ae3a71ba --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_4ac4f5ffa7821caa2254+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e6a3d481687ee70eea4b94c5555537e59a02ad8057131299e0e09b8120b4569 +size 258114 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d6dc87730378724ade2978019f8627f10eb5538b --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc47e151efab0ffcced51d3891a69009c0dfe3477673b4e5c6eb656f45cb5835 +size 11183 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ec9d21eb25428b62fa7d7a712fac067ba72c57fb --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_52132a4b6612ff8172ff+ae6a382b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d1158b6483f180bc98ccf32e5efc775a7a4d3f0de2c324cf588775cd5379b1 +size 103424 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8bdfa8f10f4272ba533188ce415d2ac766ed0f0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c3827ac22789e5936c6f5c8b7ea753bf8e648434a7eba29b553fb91a6b425a +size 85395 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1453652eefe64f55317071385675d806203fb43e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5292629f22de8a25d49f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41c7b81671f8affefc136370a12a8b66ddaff158187f2e0555ed909bee43143 +size 205824 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9a2e980aff2440a892cc8caef0bb1b2f04dc4b43 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90460a3951fb3fb92f159e322d4410d84c975043323d8ddf47fb66e109cc0c52 +size 10362 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b4c15a9b644dfc6bc47bef844705e94af8ed93ee Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_58ab7f9c00b2f71fce7d+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b6bdc9b3f6c2cb319581f335e4fc003bfe1552f1 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e822cc0103b48f977d51f21735919ceff3c97ad01acc5e6b5736a90686d2b0a +size 7099 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..81112c12a4ae8592e01314a797c21596649bb401 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5a91fbbc20691ff4a085+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8749225112fc2d76745ffec313c5bddb776558bd --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b1810a85b477159ec50853ddc6fa78f6733b299efd680b6a81f1efe5245bb7 +size 1038025 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..edc1017ff9df652fca82c71aef05c66cbc7d2e66 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_5ed208cee0f650dc941b+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4c013fe3a32691f8becad830a64b0d3fad72936b6ef3ae929f652e4b6e3a15 +size 6626304 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef3d90f5a744f918110f912ff4a203eab9339d5a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0e11d078e0a581a18530a2309af3e6cb541b86b7e4ca9504b506823baea88f +size 11227 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7610cd348941468ad73ccb71a5a41d14bf42f1d3 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_609761744325080627cc+ae6a382b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a0b952d0f86de09d545781469a3b655698929cf1f369c25e4e55d2fd68dfb5 +size 103424 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cba62cab43c64a0a818e95234ec0cabbdebfb35c --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a72acc446351426d56c9a074c5b7a399c70c0fa8e2eea867147cf6948aee0b5f +size 84496 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ab54b8f4e7acaa2230197abbe64a28070c7b0fa7 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_6842d32efdc3bf8cfa8f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17cbd35d54c3772889c1e2994d7c597be842554c06e91702ce007894da8663bf +size 185344 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7c308076ec0c2506ec966793b60006de13e7d18a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfb51e710460e91b7533bfd0965a5d523dba32842d9ea691a027684440b1343 +size 68829 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3563540339e82d72b306a3acf10685f5ea9ab9c --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7bd62d45a50a3e4c46b3d1aee0ff1d2bb4e6aedf319bc59a8ca5328340ee135 +size 246784 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8d2307f9aa4cc3ce6f32202342d0102895e185aa --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7af77f8c231b0912b481+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36e1d7b7060edcc330b337ce78dd4f25850364e27f4ab80ac28285e47f5f512 +size 258114 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9f2ee4d700a969ed3795c8bbf3fc1ad86001e134 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e694186b8469797b5bcc03e3f78ebdcd72a19f7c6f144f55bd4cec92364460 +size 88361 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df4e2d4534994a4399cca8840ed72a3e09e6c990 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_7e32ca18bcec0bcbbb95+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748557e23b7390fd4b933c4fe104cc3bf2e634f2367bbcefd88bf351fdc7d199 +size 246784 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..114314c7371becabcf66fee984c28bd571135943 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5ad419914b68795a8b029db9b53461821a8e1e0f99a36a109147da1aa92b4c +size 84609 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a4267347aee295e5cc83341f67e38a17f8936025 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fa0d4ca0c6a4ff9311b6c76e1b0e46ab22945a50ddcb8f90d5ac182217b674 +size 185344 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b8078fe1ef16184a5d6d2d61a0563af8aaedd57e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_86263628d1daf779ec21+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72099868f887afd58aa7172d32cc2a8a6ce780836d0410be67fb9ac87ef2d492 +size 193114 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d4745d9887c00bff0cfe8f638daa4164aab71574 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5d3b13091c62603e3d3f4a1f5e634cc1c5dc27b36c3c0ea3dc266958edddc6 +size 80318 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92109f874caa7d00bbc2158691416782da391ad2 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1026c716f561d4daca04c32fe0ef24064f6b34a1188721280f27d8d442044589 +size 216064 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4b004c5e05a72beac792e9dd9716b08899026b4c --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_89d62b3e9349d4bbd3ca+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430563c9457fbd17d6484dc3ce75ad43edeba9f7050bac3946aa6c9f267a808a +size 223770 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bec2328da91b8f13cd09e0d5bf5990470b6547b5 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f763f9ce8a6a8b5133c6fba58fddccb37e85fc8d21ad8561417f84f3c09577a1 +size 79983 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..88fef791c5cfaaf1c8209d48028985f3955f2181 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f36547327806f086c2ff86930e79aeb97fe4dd2a10449f167703f34a00d9d0a +size 308224 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1096fca9deb9f3cb3c608bfb49c4be4702deb455 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8c2a6b120271c37fee40+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36d3174f754d9b27d3d22ca7ded21e66ae3ebd8aadef591d8d01725f8eef44f +size 319522 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f955a352ec6e7663227481b95106eb156f13cc20 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c93eaa947432a5b64ebbfff5ae026f96a545410d476c1ae78aecc9bb02864fd +size 82126 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..34983c2c48cdffd1fc5d16b5a77d3f4f8828f108 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020b94e62f0e866fe482ed38b75b0c25c11c78cac1678c404d659d27d52c26ab +size 236544 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bc8ba78f6d3c66ab438fdb6affdb1b863c748046 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_8d3d2ef7196de2006c25+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a34a8d129f89c03a613cac5eb20304f6eef8de15b6f2e7f341314bdf0078d53 +size 244319 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0063d1e7b40e03b200d3525f37f835d0cef47a44 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc46c05376f00dcddb7d727fd8f617bc1aec6d53cf8fc1b8743c6e312169ee0 +size 582901 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1b15c49db4ce16a557cd50cce90aa6cec82107c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9eb1aa8ecec0bf868fced67bebf77caf7484483d405ec54e4bb29ccd5df27df +size 1444864 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f289e16180f02a3b263d2f382566c58676abb586 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_915f412a8437431eb2b2+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2ce738c6e24c660b3778ddf7de4146d9e0657dc7173cce071bc02f88ac9ff0 +size 1589775 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e8d7727b0511a350878469d52667f508aa8d6e5 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a19e4faeb0ab97b7be4e7f34fba5b4bf0937fc70ceb6cd406a20798082ec49 +size 7099 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b36bbb70e6f15e037abfeb347998b9554626f794 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_926d768d7b13631649b0+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2fc51e5a0bba1f448b3ab72f81b3fee7090761fa --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19df62d98437637ec01b5d587ef608078ab295d9a02af329f9ac6b37b35df617 +size 81045 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..86d6cfb0f7701e3e833ad9ca817c536a02c04ee2 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_95ed1498cf7e68cf7bb6+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4244ecd5fba817eb84432ae7b305d3855510078a42bd07c4be03535ec4409646 +size 205824 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..47fc059ae1997876c549a6eb37b1b0ec42b1ffdf --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16531d760e00ccb36e70a9a77d23a377a0deec739a0cb0616c3f449d7352a90f +size 7106 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f01b6f2905ade6a4186e2b057c26265914ea80c Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_978ff2f41e9d7d312ffb+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a385f51ec93783ebfccc02bc4ef8865b40ef8be5 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1c18742fbb646d4a81924eef4eece4618c7ad8f1338a434846434eb796d5c5 +size 7106 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fd14874cbd4eeef2dcaa3fee9d367ee35be95286 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_98bd7b89e29b70d5fe2d+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..de375ebf9b2836349598abda502358f0c457beed --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3348f48d22d70762f49f32da613f73ad7e78cc9d748a3df3e740027909ceedf8 +size 82126 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..60c12a0a31d6e48eaa818acc3926ca3968f63709 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9ea0c8ab8bfa30c166540a2436b384d483e4132f9acb96d1c5e7f9a3fd9a54 +size 236544 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..64dc0df229296f4f2b8c8278df194926f8d8f237 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a106bf8578a514513a7e+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec08a7d5395957bf9d6598404811d49c5d93cad5536d61c8b457085f6a6bde7 +size 244319 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68aa30e43cfbef54a9968bc017dc327c2d7d2bce --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee70ecbac589b9d810beecbc33403025e0a508fbd6e1da5a4a39e153dae8c79 +size 10010 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e0fae4099a296e0cde6751e05fcdccb362254ae8 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_a27ed69f0e6d777f54c3+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..215ceee44f6c8246f3858198b667ecfd5c8ce77a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90f1141f141d555ff85ac76424ace7524dafdba4da6c73df4fe85678793a82e +size 90014 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3e2067df4ee00088d37a87e84bbfc439bb8df31 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbf5c032eab78bb0bc8f4a724fa91c7aef3571a8a20404bc702203dae64cfba +size 236544 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..191c2c1242b6831aeedd13b932e0a712aa6f2a60 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ad5dc858d958ddb16d68+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68cc55e94f9bfbca0b5614402ae552ce33e92771079cf293b4cbb9b197aea73 +size 247153 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0abb03e6ba3f71a70a8824f68651776088c5d896 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc5b9644aac04a3d73164009caaa0f9efa04f8817148b75a22e9702911e0e6b0 +size 52490 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4e397e2a8c819a9c290ff671c6844005568ce9b9 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c407e52d503c4b99711f1bcab92bde0f178fed2ac44e8de9b449f2936c514a0f +size 185344 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e01e6c4c9d40cd3fcc0584223b4533c48b2ba79d --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fe616084884188251a4aa014dd9301b5295a07a64717ab133545498b9a808b +size 195507 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e24a625b547ce14ececd84d8ce73388ef0ab5ee3 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97745a3bb0a82f0b13d01eaafaece09b5d1732dcc5cf078c1c3b894d6521bbbf +size 90014 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f1cdc1de89cfda015980cd5092dda3843ed82d0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e5ebaf2a18ff71b4675d04269698f65b780ff3e4aea5fd059311576255d56c +size 236544 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..30619923300cbe451d61912a9c557c47374e96df --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b51d6989e2180ec3eb00+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b95c4d61b0d6778d110926936815da2adb39dbfbab64c5cc1a319623b5cc45 +size 247153 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..50c8be29be77bc1f4eb50ebc640973e70255df40 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233ff5dc3c165e897e91070156fe9600ac8006d5fb231f8db0bab44709239ed8 +size 80318 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..770df12142e2d79d72242c3a6d08762c44db74ea --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4633e08bbb516e7dda7e71cb2093ed31337ec85dbf53f6daec15d4deb1d239a6 +size 216064 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..39580b9fa220051bb17dd40a3592aa8441f472e2 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5e92b6409d6f348b3ec+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fb1192159df3aaae88cb03f02ca0bbfc1b2f7812c33b3f5128d62bc2eb85bc +size 223770 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a45ac96b068b4f734e5b95c5c1a70e2b0421b53 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952dfdff0640134652e8907767e25827418409d6741f69a15c42ced982ac907f +size 7099 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26e4428215bb2fd43dc31c748fa4eedb6e494fc8 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b5f9b5a41f86f2f51494+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef61cd473e7f26fa2813bf12f7b07f697c4294e7 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28891312cbdd8e30477b23dcb8202def79dd76ee1f953e8b336a794c11880f98 +size 85223 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..77f075bc5361ef6845c1e8bf4eee36b96bdc8276 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedf092fb72b129d7473edc3bdae36fc65f3798bf62ecba4002f7f62f375bdfd +size 195584 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..969103758408be44316c9f5e5e30f5c7cdc49a4b --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_b9d2b010664e6041e48d+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c6f3d90a1919a4e963140ea48e702cf540d53fc72b96c982b5d0d81b992c6d8 +size 203354 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..783ad8346c6246a9a890ee3898346daf2f94df8e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af58822b701d7f1dde2c7abbd68e78b895b4c5b1fa371fcd341fab21fb4a5a46 +size 11227 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5e12d8d672f0e4a76add4e39b351b0b00d45c06f --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bc97181c1125ffd58a3c+ae6a382b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a848936dac0572f338606af1ac841d605b32e5c3b2195ffc36414edb7cd3d7a +size 103424 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff00987ee182c554d9ff86d8fc2bae220524ca62 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270799a68428bbba6689e3ec0f5bc39ba3102e97812e7e7f240050b4597f4944 +size 7011 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cd72c2d90507046ee3fbfb2ffdd49d93b66f3ef3 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_bfade17f28cf5d22a20d+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bb1cbef93a8e4abd38a09b85936f2c29327b758c --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb20f368381418114380d2a44ff7c035f66037c66ecb0527da70018a238ee97 +size 498691 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f276b443c2f48b10c43c91a939ecefc054efa6ed --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c195f103ee78f704e567+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90fbb9c56465b58d9535d220850eb378ea0afc03fe28119396db8858b3be9533 +size 31345664 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..310496a50eecd13e41d6cf2799c2b954ea5ac3c9 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a04a7c172166070e264081ef4f2460627f3a5dc73f08e7b1e2e8e17694d70e +size 88361 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fc35de9bf1821192ea1a832a10c61dda97025631 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c5100fad76df191ec03f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ea924a46c35494ca2255809d5da3d1f6f774623cc10a393853afba68d62577 +size 246784 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d1d7eea721411b9291c24145a2bdacf089d2fe3e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908804136f505f843c6e6ab387a2d51d433c430cf8fb423349ac638deedfd94d +size 81045 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..42aa631523f10b7099f006fe838729e3872e9511 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c7460d110b4eeeb97af0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fbd3860be4203d5281731b584e6fe24e0a3ca43b628a4852a70878ce470bb4c +size 205824 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c839c3566b4b3f8ea4d7+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c839c3566b4b3f8ea4d7+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c839c3566b4b3f8ea4d7+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_c839c3566b4b3f8ea4d7+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c839c3566b4b3f8ea4d7+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9414b423f8f71040a55a3df61b0deda4dcc84734 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_c839c3566b4b3f8ea4d7+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1943f64b30894833e161d8a25cc8d07ae6cf53294309b10e8c9311978be447f +size 690846 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b46a0202f2245ea5d0eb7edf578867ffc2dfc5dd --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c454b59206e26963451e0e80eca796c697e09f26206f2b7e9bec24b30d02beb1 +size 7106 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9c313b5ead4c69ddbe0e1f0280ccc3037cd2bf8e Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d632718eb53a829b8a93+ae6a382b/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..037db8f3b64fd34f1240769ced467ca8406778e3 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04305cc8082842f2e3066a7e5262e14b2dc3d56ef656b4f8cb04086c209e86a4 +size 88846 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..622a27e6dd8676b912a042ae3fa824f9ab21b7ae --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d9c5983d78b6c1eaa296+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a18015badf47c031bacd4735b27140ddf93e8b2af33feff616d79bef781cb968 +size 277504 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3e4a6dc666c1db565b84508941fd2e1002b3f512 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f2e3b0e2487337b55e222ac243ae4100ff815ca01fac3b9439df2ee7c0b7ee +size 81633 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..09ebd55ad865151966ba2268ac79a204086b5d9d --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dbcaadcca1c9b6971929+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d77b88d74e4a27fefe2fa057fb3287e90983895f14a0c6ac8c2e58df3ea153 +size 359424 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0e071c428901c82149c64f5a0ab5e93d19bdc77 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc7ab7fff7654bb11970ca0cc6688f7a2da4ea6dc52bbfefd42281730ecbdfd +size 84496 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cdd22bc0820611a699c2ae718313dc7fce9b5ffe --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_dcbd37ecd8c47f0d8d55+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a68fc6345e351a33643aebf9ad02f5d592b5545d3b59f9fd944cf6a05d4948 +size 185344 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..703af4677abaf785d6db4b596a2d18d64d36a349 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe51f1efb35171f6821d77652888749cf030561a49dc2c78b793a78e7f151a1 +size 82742 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2eba869ec16cf5b0614c1e1b594488f208eb49f4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27091617e48165f1b800314c75408dda288159b856e646a7251994bc5b3bbfd +size 246784 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..dcb16b8806b563761cf6e8039f4d8c4a4033d081 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e009dac316c847139d59+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e515ab0b2b6fa71090ea305381f61b144c5ff67111306cc736a5a7459920563b +size 254559 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3427c940284277e6cdb28c4829ded7e20ccfa88e --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddba6d99d7e6d34b168557baa5e8d2883855a2bec08b321ff82d3c1febd38e75 +size 81899 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..917a165e17a3df063165bf2e4aa042362e27eb77 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e2a6181df5beb66d0222+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1874e75d417a39abc075a8289094bf8f18652a1cf0cb0600f04823b34a0414b2 +size 236544 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d268b8a6a28ee3050f0e7b19c82f48a850d482b1 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:739b05cc1395ecff11016f22b307852bb2a298a9deaaa5ed33c0d45a44bfd64a +size 417664 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c212b53a55ef5d1153a46621ec982a470a505b08 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8971b379520002ef01dc7f2b311ea6a5f57046102b9cbcf47d5c218a2d505454 +size 2233344 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e5361844e4a6001bcd56b4b58d59b4a176cb43d1 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e828a9ab6bbbe87f2110+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5172a093e680c68f4ac58a4fa4af207e23f703aafe578503e5971d8c318e2543 +size 2303088 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..526d8cd4ad5647654b30199c725dd1ade08cb25d --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23873515e9b3ac41346ff4633bbb3792ee6d0b87624a52ef58e92011057e3277 +size 172535 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..db19d45fd41090a34f94395bc4ce76a35051aa0a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e9ce27504f605841dd3d+ae6a382b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279071e0c2164da714c6292e68b9f6b321791c90e8c50580c82f0d905eaed150 +size 2274304 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7539c269ad0e3b11fbb6ce9d59c51bc35b6cd5d4 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3558630044fcefc978fddd3670ed5380e112b1048af13f8b1ccb06e35459559 +size 53803 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d410de1d88705a415d9eb4483c8f317c4db05abb --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2bdc1b417ac1db2ddc6c2bc208490cc6d744567fb35288fdf92701dd5f211eb +size 164864 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..724769fc90ef6314ff1aa48b9a44659535acf902 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3fd1239d4a7e5bac5779b18d56f5cdc43984799d --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153c4c8e3aeb51a8152f2031e8b400b02c3f08e93552c13a796d877d91bc7349 +size 69051 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..432806b62d6d300fb0e682a4fde45b2c8001b405 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_ff351d57db88c064c02a+ae6a382b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2c9e7ec92111886930f8b9b3cfda099b67549836eee69ddf2a0480b73f05c0 +size 1168384