diff --git a/.gitattributes b/.gitattributes index f9eae1bc33708141b33233193c9de64f024d4473..6258c299e6bb952632c1e70d096d62c14b6d4a41 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3022,3 +3022,78 @@ neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_d84f59502564753faaf1+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json new file mode 100644 index 0000000000000000000000000000000000000000..0aec4766869e27a9d7e325c11ded5bc4719f15d9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e33a0b528098aa1de20a5c6ef47f68185ff6f6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json new file mode 100644 index 0000000000000000000000000000000000000000..2ed4bbbe004aabc578e6aacbfb90b8894fd35683 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json new file mode 100644 index 0000000000000000000000000000000000000000..83d9d035bbbd882bafa5a83c36fa382d406a5649 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json new file mode 100644 index 0000000000000000000000000000000000000000..5bcd9cc3ba289b63a355d97b2902e5946e2c547e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json new file mode 100644 index 0000000000000000000000000000000000000000..94280ab241953e8ee41eea5696b2a323dfe60111 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json new file mode 100644 index 0000000000000000000000000000000000000000..8295f5730c425f6792402975af5efaaaed0fa6ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json new file mode 100644 index 0000000000000000000000000000000000000000..58fbc3eac28b34130a54e22178e39d4bcc7698fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json new file mode 100644 index 0000000000000000000000000000000000000000..40e356156195f024a84c928d5a71338c7efb695d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json new file mode 100644 index 0000000000000000000000000000000000000000..f974a74db59886b4f746e5025a150a95bcd1d1d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json new file mode 100644 index 0000000000000000000000000000000000000000..c3e410477517b4dfc804c32715b0a88df964dcc1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json new file mode 100644 index 0000000000000000000000000000000000000000..71da86815cb70f248396abeea8d175652b2dfaa4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d70aa60f81f148020768aeaf290bc8ee0707a21b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe405f977ba3d02427fbc57c6e26c42d54a7107829fb44a5b2c93cd232639bc +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1dd04df56e99633178d7e0cb98aeecfec2a43583 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea7b337842ad8f8189070452b9ab593aeb9b7e4f0f94420722650c1c8eb2853 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..80f8860c574aa37721bdd96e25501ee8c5d5bbdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d070b3828aa5b82e01a08ab35b4ad3fc4fe55e3b2a7038d36b8b071f5c43f0b +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46f295524e7a9f8b3fb6dcb73ce7f3535a237194 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ecaba55fb149b84609a0fcda804b09a672d6f769b4dbba251cc65c06fb1ffa +size 170118 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..23a853315ef8a152168971e0614a77f904a50c1e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9635c307d1d4f32af90134a1f3c72119e6a300e86eb5828f374c9092ad311bf +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ef1bce54d951dce3ca686e792c6713a8c2a2d6dc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df55c6ef9103600cb43b5d4160809987f5681dc20c9a91d0e4b5b1c9c56442de +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c85de92a248a02d84c467a85557369c81279837a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a43f8bc843e3648d0edd9e5ce2e5d6bff4eefa4bfd320c09064611eefe4799 +size 175084 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d713af28496e1cdaa9983c85aeb46bae0830100 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d390dafe8fbb260d63d5ec2050bd5e1599ad0abde1ed9b95f82b8e3761a8930 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..775f337fc4c70b9be4139a6c12c478419439e45b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea4e42db654ecf8f6c4bb716bfc8c97aab96ad2977ccbe3d40bb8a498734859 +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ac17e9f6652613272c35300a4932d559cad63a09 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba3cd4ac3324f4b36c4953f5b1acb31a61a6566e91a96aac24cce071aa02e49 +size 120433 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ed1e80374c94743d6e5f4a157d5f6519f27b5d94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f4d837a54ef7c757455e6a04e635189834f0692b26cd6f5fae305a1acfee41 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..72d4bc7d310a5b63dbcbc92a9b8063ac76aabb35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21ec9075554f54b62b487e36f84d09e3bd0b0b5ff9b5feadbe62ec18f259299 +size 193010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..619a65842b2ffc48ba20f589dcc8bc9879805968 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b022dbe3118ee82874ce21f77ed663ca595c5a2581b72ea6e2e9566efd79f748 +size 170200 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ae13e0afdecee38bf24591968f79a3f4923336e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aac636da500fe56a6f42934c4d48cf4f4c82c5baafa2364d29b7ee1439ead79 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..612cabdf1f243c6e3af44181cb3cee24c6e28403 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3acaf672bce0cf0f241b88be8135f0bca7b2cb0662e214d3480cd833690b70 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..34ea2b7be93b8e31055d0fd5bfce8bab64882789 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bacdeeef300c5c2d0e96727f7fa21475aa2b6423e02bbdeb9a14676b19ec8195 +size 143137 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b06fda2664af8c4bc693274ad12a9245323cd786 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d8b80a8c94b7dbe95af2e17930caf5f111fde2354654744ee0802bc22c9988 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6805760afc02a2cd5497a72df5aef4bcf05d973a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4988382c18c5913bd1d53fa95ab5491cce3fead13881bc3fb5e82bc186578f +size 195507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b2dfc9e84d051f93581a365c3a69a8a8bcb2a369 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a707c38f55510f08cc4791c50f8cbd9ad09725f95e8452a5ba2b64c6bcb1f67 +size 112633 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0e39296f988ee211d9d8290fd1467a335cc9f4d0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd924bca325e89e065fdc0bdf20698ef7921237c82218668160464b433b3fe3 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a0997fc6dea40f63e76f38a65e9305f420d6b14 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c995d029f5e6638ba2c5b3ba53974db125901dd91d08b036492ec788838a860 +size 175084 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2280c026ff31c3c721352f413b97650d2c6f440d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699c332e9b4e3a30d3c13f7d463f314e86432089b6081068f6c1bb903332c2c0 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d70db4cee13e5afa91e2543518a0ab643e4fcafd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dee360f558901db6900c8d561db2945dcbef7feb990479f630016494f80fc2d +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b3d44cb31f591666ecab33d6df40cf35928859e9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9aedabec1b9544b48b353e2a649d4029e73996ae0381582b2035ad33d17024 +size 833960 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..de4a40c5e46fea76ae401acffc49d6849dea8c07 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799d6c2e202f01a8cf15fc7e0e795d1ce5618d204549943c0daa79586b1ac09a +size 32103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad29d3a5941cfb376b35d4a5183e402c138fb699 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ea35f87ba50a601a1ae0efe18ff3e43303263bfbfad87867d741509d3dec6 +size 159819 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0ec751be8c9c39f707a897e4ebe1f2379621071e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b237fb6ce83ee0c2fc85cd3ee4e6fdf0e6c95f731520b402e0721c63facc39 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bb0b120bff742c4ce614760a13603b3ae8c63d9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d61fbae86b10dd4e6cf69425f1150b9b764adb3a04f29eab1f0ee31dc18f1c1 +size 159819 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..610234eecae5adb5d685f47ccd06156122760593 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b788954a3ac03a763f2c1a6abe75cc69b038eb4838ab71fa49c38aceb156595 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..807ec0d3cdbde8179a51bbac7f8124feb76db4b4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9699a4e8777c6749ca1b08e97ee2988e45d0c0564a56ebfbde8dbae695b870 +size 175002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7b3f2756a3557a814dac45acd7593f4416dc2c5d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f384e9e62bf76d9e2a2a0d17225b8ec3b0a10cf597959c0e79792442cd5e4d +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5fcb7dc7d3b27ef62bad829b3132d2c7275fe7f1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd0c20afae665a8c066e0018140480d3f50b1069a17f201ee7820bb2ac56c7c +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d32bbdd4f7a5360a24bf2aff036c20fb3df19ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885f72461238da9773ec54f9077526c61272e9168da7e56bb3714173bbfd247f +size 189416 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5f259c978846ea13f189f62a61e296f9b16bf398 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a57b155a9bc0950a11fd4958b21e8f04e2005f9e6fc5acfa44cb2839078dd59 +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bb25794e8a3f66eee4d39a35701f5ba09bb1ecc2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07386a196931d31078b1e2df77fac94f7836e56fd8157ceba41755b17a782462 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a3151e16035bb377efcc98c9e86327e3df98403e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa08a2197418946b128018941e30011676c3fcf15e6e0e9d001191e50975203 +size 217277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c5ef86b5a6cfa94045cb6e63aea5032a3b32cbf2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818181934eb62f57d81f84e9e61ec5bd71204c948e995f09ffd21d6bf80afdb7 +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4f49fcfad1163e734c145c2ceb69ec19733b7554 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f345bcdf3b91a6a4e1a747855943ee57d1861dd9a3c0cca95b9bce913fc3153 +size 120433 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1845c1c3ff59da967f1dd202d350f10bd763e133 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa327af0bd70b02e867e3e87943a23e672dc704f9c52a2343068ad6388a712c3 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..838fe83162d53620ed2315ec202266038449d721 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2d414a87a831fc5a1f7f85fb9dff8a818c28e77bad76b248cdf808e3720cd8 +size 193010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..029f0e93c4340fe229b051c5b91f7e238d5cfaec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc852967a2ebbac25ed57529b2948f81f6bdaafb51c59e38ab4d9fd5647e796 +size 161606 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e3b3a8ea1db87c2c39477faeba3296794d5ba968 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc8a1920bd9b425d09dc0c8ceaef87904da7deb1f527be2ec0537f4e05e596c +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b1cd5a4dc8f28797098bd2713741098a6f573546 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1526da0d028a11f93e160ca3eecf06a77040487a673d909c3fcf534944003fbf +size 218822 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f212692c48d27d9cc013628f024c51cd72ee6aa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30fe64b5ea2bac7beda5641507cb6b75675ec910a2bd9fd0ae91ed7df07520db +size 431104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..38d63d0af5bd104db901ccf5682c22f6e62c53f1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cca1dca931c22a1141ea848a43f53e1bdb1eb2b3ef2265a8286775fde065bac +size 189414 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a43264ba12bf9522562d46a0b75603b0f7e93e1c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6269f6296ba4e8aeacd3baaf25290b7fdc2a995cdb785b4b8216483a6dad0c +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b8bf7083ee8e5fe2222b281021a9ed0635470ab1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed1644dfca658675ba1161dd39310cf790869a2f7e45cabefea2c99f4875c31 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..923f63b2f5b7f20107631d517615974094181b1f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073bf71eb73762331f3ca510110f5f8ceb6bfbe1166f4daa3f2c165612223274 +size 161552 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f9b87e16c3d4715a99029fa2e3bfd73c682d605c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348e530a961c0aa85db2e222fea3388a5a28fb020a3d4cf617d71195d7226d00 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbc10999d6b768913b295fb7d50ff69b73b6139b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1987826c9843da28c0546864697f9250aec2e029886b4f9779f3ace1623ea1b3 +size 175084 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9a37d4e7a40f2db8d09ee3c2b4ea74b426211839 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6eaabf37ae9ca1d81252c8e1b47bd23c6366310ed6320212bc61af2540e490 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e5647301ecdeb407a68b300004a1d5cc5916d977 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01eaa59c69b0d14228345344b7029f2fb23c75936267182fc78767beaf4eabfd +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a212bf590655fc59212e91375ac58d4ac3abf8a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:160a54b2690d5bb2eb17da1565a42a86d3b770ae6db9cfd6a30f9031f4e63c2b +size 131781 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c75e70b560f3a8364c0cdb4df9b3b6ed94ca3e20 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e32c160596287903b26b5dbc6039064f3cd94156385b9306f2b3d39781bb867 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3173036d7027fb055ff3c3785cd8660e8b807265 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c256f0a3816ee65563b4989bd810188f6018639ca7204d5a69b1f774a42f755 +size 170118 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c47a5f16e5afd26963785a9c0e50169d96f5f4d2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95cd85b860ecb2d73cd3e2f3fe2efd949dbde68b26917ff0a565f470bff87b56 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fe22d2751a4bd033cde52b4206e8683ee30fd096 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e767e8a7e4f2a5a55166066c6643b52b773ec28bed262fd8a56988add3cfc2 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5be2400b66b8d7e2b992659e83df2930b2a9e9cf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8215827868d03b945307eb003b3b70a2705bbc6c53e01edcb9d301eb8ffade42 +size 115571 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa50505112770fec77fef7876472cfe40a011eb7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba5802db6482a705bcec4cd4e007ae29c5a52a3e5b72b0f8abfba6dfc9ee4cd +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..078e1d90e6d5e32ab799a5b56b799183edf695f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff3118938311de64dc3042f8cc8e41cdfcb50e6122d60325a64500b8f3a988e +size 182701 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2149c8b65934ee41538f0ff8bb1de51761e3f668 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9dcc2d291a29395a7d3b0ecd3c329ac16dec5a55d463e1547fdacbbde664e4 +size 10010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7dee00ba4c074cebc719e028b81cd5a2fb31505b Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7859f368f2058def82c3962b734c4d252223c7e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb5699f2623ab6ca092335dd914f1b12c6e22ec62337a8202d8dbdb79fd4078 +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89bd8462a40c976e769d73702f615ec0bbf6998a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49054c8c97e939c851e44dea55e571f5ba6a5060888a4ff8183b161b9eeb0220 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2115495a10b332ec3380df82ef4b56570029265 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abdab3e84c6531e5571396e01b58b3bfb35fb7dec9d21870a04410a3f7613ac +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a8543c3726035644a9abaa8f663d4ebf7d9aa85d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ff809696f5c1785e8968f71f7fd539fb4c543dbf2db678d3695e47008715b8 +size 114413 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5bc95a4d73b1e350690335cd902556c7fdb52c4c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9cda3ea9fa1b5d0e7731ed35cee573bbdc6a3feb1a6c961073d021375fe1744 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..98b91c77e3f923670f46571d8ef0c24b3cdcf650 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f80fc9d34da45c12058a4a2e19aeb07925dd0308836234b22745b6f8c75975 +size 112683 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7ca2c69a7dd748d7bf5c1b9a7744ae36f4f5b47e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc96305ae41b8647b53b4826fedc3f7f9d4cfd755e88955e88fb18ed777f830 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f6106f721a29954ac9072ffb0652369d4a5fde19 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90575d8ac097c7b3d12dd9134f57bf9eb110b7fd58639e5e54ffdb0b7cd12b06 +size 115493 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e436dddfefd0fe4f58cccfc47bc57e87a282b2fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72abeacb6f080a03ae68139dcfc5cfe2e1df870552ef49f7adbdbebb55d392b9 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..268320b2197d4fc0c6ab5cea988bb65ecc616bed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0255462fffc4f5ce8264ef23982169dd0ab283292314c50b8af5989884c0123 +size 182701 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff index 88e17170ac8d587c435015e8f3079aa4ca6eb541..2a70a7b4fb9359b170c8f4380fe5d8cb4490ea48 100644 Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1de18ce65c36f490eab957a2c042a0c7b65cca48 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e8dc33d0a2e7cca61a080629c476153085eb635298213f83b262295ede1528 +size 170118 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..db6327536c2b4126e0265d56b103db542fccbcc5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9d6c106ee9d7efd5ca7bdc125c0da4f20a885d28436589646a87e7069169f0 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0cf2ada79836d737763456cd7e17d8b7e8301a82 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd18dfae3e4b0dd6d82b4e16899dc55080b3a8037f39ceb6ff8c9997b6f0714 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f4b3c00814a76cd2347c920c2aef5534c09387d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f72b9c5a8d3e71a562f16bf240e58376cc17f16766824a77ced0e1fba440606 +size 161606 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ecb5b12fdbfd8c0a572647d52b245a509234ac36 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b829001da76c1e08c005dcf50d602be56661ccf2466909a95033c960d9baa22d +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad90a3e37251d3051e47f955932b35bdb1fae4d8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9bdfa586e302eb86f47edba9519bc8c290973b14f4be2b802e0cfe37713c7dd +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d2b63d511dcb6ffb3bd4000599b64d01e113c217 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a783dc225009434cfa17af43a37207cf73886937 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a9dcc9793d10402fb1f07fb45e8939406207c09b3c77e7632abde04ea36135 +size 143137 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..744afa14c3770755ef1955097b71b08977f96f37 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a0e784a103064a75a86db0ef3696a69ea0f819df5d54e3f195bedad2b762d5 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5d2b7be4ff749a841702e062b440ef9f203221e0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1d4e9eb58a8fdd2b1387f257497c8beb4784d146394bed37a6f0602d4b8eb7 +size 195507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff22fe07da4399eff30c425405efbb886972f137 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be00605073f4db3fd213a8a27bbcde200b67212af9455929bd71e8176f725e1 +size 161552 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..717dd9347210c2f859bc66408081f107141393da --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce4dfc5bc60ba9f8a65d2f3835d57b8c63cd4913a003cb6d0d6ea54e87749a2 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..559521e7d7c13bfe1690db1c57744ecdf497301e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50098abdb580db6b2a8cb129410493a5750662f6e11632923aa4bda70f832650 +size 131781 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d3db94a76c74abe46fd9754004e0bbb2e1e9c2a5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0df4481d142b8c61eece507964330d7aff645d455ded810f45864879d04e37 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..23a19bfa109c80b40ef82002a5febb55f89fd404 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec031961421ea46578adbf00d706c9736984e44137dbff9d84c318d179bd4c7 +size 159819 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..185f876f1979dc969300e2dcdde76ffc26644e05 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce79df124530519425cfe3d7c854fdef43b6522ee595b461b2d604c30651736 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4a135f56ab06aa5f14fd9e1a37224bd73aeb16d7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa95ac4e73f11d5287729e3d53f8afd97caf8f99446ef779f0b94aae920d3af +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..164fab078082c968ad5e828cbb1302418447f959 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1503c4793f96619ae32f9b83c56940327c165b093bb9a4fd10ea68b26dff3944 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bca9a61167356d6d0a8338888d6d5d72591900c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:339c6006ae0a087a1369554e5369992fe0c980ff8337431ca4a29ac1517bc26b +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a69f8358f41901662c39eaadd5124838d69674a0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107d620aa0aac148e5e9c5be098a4c139e6d7d93f171c1c04d89fb134b5a01b6 +size 159842 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..421e5d6a5a0b45a119cba0fd8076ff7bbce0d831 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b90ccae9b021817027f4755168c6c338936817f905ffdfe826b946f35f3946d +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..262e7ff592cff312d593179ce8a4d23a3c82ee82 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac205fbceb2d5a93eb3eefd701958ce63287b0c5bc61140794d81fe51479e96 +size 217275 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0af8259fef7d6f8f086ccceabf8f4976fccbeec1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3290622eea12102a0f44968ebe5c66030280749b81c767c4ec1262f4d5674e0 +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59773b0dbcca820258dfe53f95e6e76ef7a3b0ac --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565ead2c15f2211ec08d6a350de1f40e787885e52a7c79d1670d5c931d2463a4 +size 955802 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8909ca101ee015f0014ebe241796a9e0b38da2b3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925d090722b386bb8112c2111bfaa41c1333e010419ca697f348ebf903f7e3e4 +size 2161664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c1f0bc08a22725f5ddc5271f88eaa3d51900a227 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8bd6d65f6730b8918d90fabda6238232734438b0d07f9dcb35c194bab20c49 +size 2231293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..50f1095e9b415fa7d46fcd5452892b51db57ea3a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5abdd863dcb22c3c496b816c5ba01b1e289d2de04c8eeebeaa6674b00e40697 +size 221211 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ad8e7c55401eb1b47203ff1cc2df107e718e481c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:320fc43ef5373a0a794f47c5ada0b23ad969dae6668a20b4e0c4014956dda0e9 +size 328704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c6e35fc106189cacf185113fcdfe6132e4def4ae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0911fc7fc3f04d5e80f8a91b2b73e71447932cef89d8d51d7231261d6dcb87 +size 340002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..299200b7d9a3385562f226b4618481eb702e2893 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f628b39578f42bea77d256318c29313eccdcca9a95edf5a5257efff3fbf28a7 +size 189416 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aaa35c7c2640ff9aa349b59fc013eb39f104b288 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa09df687b0d3dabdde9692540909bb82330a76579f8a2674978f15f9145ff0b +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0e7b97e5ec7e2ab60f369092ec407f1d02c29790 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8941603b520a4c3f471835f80a2a93277104ec394d4280bc0464dd0e51abe154 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..848fb11ab811a54e801c0024b64e66a25df51859 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b19dcc7829c461191828a0ed051c9e2551841d4437d92455001c14039fcd1b6 +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5fa19c0e0198d5689518024144eac10bd6a853d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf90a43a2a22734500cdea09cbc0524a690790b79a19c5463f4063608d2773d +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff index 7796346494f0daa40b39f887fa7b25c8c133eab7..278716d1d23a8f2062aa063ac26b08c0fee8704f 100644 Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46928ad49f1efd6aff572d743431e282be7ba944 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36962f23ac765ea4fa33e2455b1bd08f06ff25ef05409e22acc400481427df99 +size 217277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9b877ccb1a53d3856f77831aff30f33941666b73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4cd46d0cea0e8db30f972671826671caf8dfd40a4f4e6bc14887a59466e7c9a +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..42c585184b3733725f380fe79262b336af4fe278 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c889e039810569cfd70f898b6a7466f59cc17e5e8472f18c8e222208ee4b7549 +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80aeea005757697033ff909e95413d701b7c7c52 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916ce9dc21776c3c69d1b01cef5e1df9dee022933aa0675b316814e7425ef137 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..da5ef08c43ec154a43eab7b5b49c6eface17a03c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d87f9eeb300098238912e78ac29c165c06863590b81f2dae1c7e037ab0b2b2 +size 218822 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5a3d7521d5808640ae508b60273e511debfee2a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8221511d51f857152e2f8ac0ba3c90fbc830139676ecb5c4f861f5b4f473d3 +size 431104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..250d2041988ec2e52f08ab7370dbc018a1eaed63 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c6d448a38db62888ceb0e3958abd178e45b0ac7061ec77084b6b64cb3b156f +size 114413 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3d227b69d9042b935404ec9b80cce1a369b2752 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0192d52bbe1aaba917efb1a68f8b1a1243fdda4c21e22b934f805238561cfd1e +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3039bf766667382752c7f7d7684bc15c0a4a2c47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81fd7e46dd4d38c0a50b96cae97b2e1eb58f0e38c534c4f80fc01312399ed5b +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8c134d15f09585c9e3d0b68a03d0bc9a60b319ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e002cd0ee4973687f5a973f793fac9fe0d79f9e71c6176d83e632c2617ef0c +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..85b18cad1a80362525fa28b12da26da34c090464 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3804a491c378a04b7503220ca7f721f587e1bf3519572d89df8c1db705277eea +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9fbcdf4739189fb44afab53abf2ef9243c8a5f97 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a29a7c929b4d87fcaf139b2c467e4121e16283d4b433340381670e22c128bb +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7ac17f6035702ed025e259a4ab0b6a66e7b9dac1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6adc0afbde64c61bc0c59d708742998d0ed8cacedd763d5b897f26939ef4e46 +size 7004 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1eaeafdd79178ba957e46e0d4193730e424384df Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..79680e62660f46e42fc77a73218dd58cb60dd025 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f70256b49624663f124791de699598fc947b0f80b8b27c73973aa67e5676ac2 +size 217275 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4137114c227239b457f7f0a7fbf323e63ae7e6c4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62275216ccf90cd1c50db6e75c6050c8bbb9de33ccce90de8066046261251431 +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab52d0992487033ed4cd137bf1983b1412e9e693 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f189519dd4f7b1b1d0de9f1389bb8ece67b3d757fd26381c835121188439ed +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3971444c1294100bcaa3837bc7f9f5d2ecf31a1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6439aef2099c115415211e36e33d0d6788e0b4aa7d2e1f76515a36cd03ca9431 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c0451d469c1dc05e7bd1059018ecba6bb6940c27 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9946ceeb5aea33b029c3424322f5e293fb879db261fabaa2b9c8dfaf78fb445c +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..653d27cfc0e0b84ad5c71b7c47329aec4868a5a2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a84d231abb65453464149c97003c0af604eba87418c8ed68da627ce0415ef3 +size 189414 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e76440971f440801e89490095754eb7915242b5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8021eb3e7858b2b427370867456aea7a96ead2ea666c9894dbc690e808d04721 +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1c05ec6cc0f07b6a1c9a20ab8bb36ea2db289d98 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049e61fefb7dee9f1f38dd3d0ea2aebe90ae4b942bfafed82954d615b2c06030 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3c6a516b771d471457e3f6a1699f88b4085fd56 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786287af28926930e2a0d805ad9723239719241353325cb92367adb8d755ee72 +size 221211 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a350394d3f05e986ad33d2a96317d09bec02c0d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc0c2db2c1b9701dc89f05dc4a403441c5e812c22cfbc8e275c94078a43d056 +size 328704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0c3b7758fcddc046e9dc40be2163a6c498c5ac2a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70c5fb75426a49d0b2ecc1cc51df0b277a4ff76bfb86572e349a2816fb339bf +size 340002