diff --git a/.gitattributes b/.gitattributes index 0181677b8e866fdec42b768bcb2c645bbce9e23c..a4a724604b866a61b3be56bb00adc5c6d91abf55 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3992,3 +3992,19 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_90537ca32d559b5b82ed+a9d440f5/model.neff f neuronxcc-2.19.8089.0+8ab9f450/MODULE_90537ca32d559b5b82ed+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_b2f70bdbe8eeeceb6e97+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_b652d0e4d243f31a7a28+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/5eef7da50a217db5b302.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/5eef7da50a217db5b302.json new file mode 100644 index 0000000000000000000000000000000000000000..4655d16a5a8109afc7b2918953bde955e7966a0b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/5eef7da50a217db5b302.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f381e9a0d80f3d23d0b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f381e9a0d80f3d23d0b.json new file mode 100644 index 0000000000000000000000000000000000000000..9065a0cee3cc7367485b0561f080267f728b21b6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f381e9a0d80f3d23d0b.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/452ec18c8952f9cf8540.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/452ec18c8952f9cf8540.json new file mode 100644 index 0000000000000000000000000000000000000000..759f899e661ad064aee4708d851d0dfe3c12a4a8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/452ec18c8952f9cf8540.json @@ -0,0 +1,149 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "988f1dde6bd2a36e3e7f45777a36507ea004fe22", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/765088ddf9ced28aa9a1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/765088ddf9ced28aa9a1.json new file mode 100644 index 0000000000000000000000000000000000000000..37469a57d6ea40e1593c3308dadc70f79b3784c2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/765088ddf9ced28aa9a1.json @@ -0,0 +1,149 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "988f1dde6bd2a36e3e7f45777a36507ea004fe22", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/c1655182b84da2a4c42e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/c1655182b84da2a4c42e.json new file mode 100644 index 0000000000000000000000000000000000000000..1ae7d4d8337d930b9ad1271e2d3c9f2d9053ae8d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/c1655182b84da2a4c42e.json @@ -0,0 +1,149 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "988f1dde6bd2a36e3e7f45777a36507ea004fe22", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 16384, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 16384, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..041ebea24d910efc46a98e0ae9b9f62ada242cf4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8f5d32b8321f6989bc9d43d673308f019fb067e57bbd2f5c8527a85e87e4b6 +size 620149 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c1d68c4b270bd4859ae492e803ef6ce5feed5cf --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:facf98bc7d48c576ca0fdc114ae50d716ac5f4073dbbbab77f15058e1172aa51 +size 1106944 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0f7adfb6f83059fbca20bed4b443cb1b81786d6e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_198b5b02b387709019ea+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b103a337ecc4130bd48980789b5e20d873a619e12a86fc5e8663891fa97253f +size 1251970 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c9fd68de9e6eeaac648823152c0cc9d9e27c9d7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84620525e12229c74c885c0ce3e0225158e4074a231ef29a6cb1427dd5834639 +size 155181 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..75c87a0f92536dca36191a5d34c0d48a1239d330 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_389bcc74371d275ea994+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717c56ebc06cff5d0cdd82a0ce1268c626baafdb8cb2f7f4f569257db24d5d6d +size 3267584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb857b806a335e57123d4cb984216d98b1dd4958 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71939c494e52b35bec2295aff99bd7f69bc8ef24a7f82332fe817463fd82d580 +size 1036280 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6f4d5126ff286da3223d926cbdd16f131f0cac9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d8b3bbf6ba518b8162f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f538962d0b2643238cbf7c34852046be3bba640e7007ac75555267e10dea7471 +size 10681344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0be10a77b5d5e666bdd1fd950278f4bf138c621 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:061f2ed785d97393ab0d7a173f5cff55108edd720bea0016ab95ff8dc27b964e +size 864561 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7097d9191794d920ca58dd0a99e5299b1dd4efd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c851212dbcfd36482be8f6b99e912b32619c33a416cf3dabfacc2dae36d76eb +size 8233984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3d962306f24750faaaace8d7b044c2594588d45e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7076b883d5c8650d94da+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba3391169072ac8478f0519b0c2a159af020c222f1a93fe3bb8bd64216281bd +size 8391033 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b78778fd0e36a61a664a7f894f63ea86efe0e999 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476aad74c9f0f0a753fcfd10d1a52ba8fc70d203b6656393faf43779213abbe3 +size 1044964 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1c7dd9651a3919262ce7ae8e4229898c8a70cd50 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_837bdb7b4c4f834e67af+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313bb2fac82d154054c4188f06dec6675923e7b3e99c6e330a0a6f03d5e678e8 +size 75060224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..11580bec58deac817218cbadc5ed5f34d810f20a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f535bf4362204976aae951872527015d027b44f19301788efbc4790f69df145d +size 621840 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26b74a5d734e29e46bbe0f7515b4a08de87843cd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c99c1c48449542c4e92bea86fa319e46171a5b9f2e93e5c581f7be6689ce4a +size 1895424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..26e2a74b1a3bc11ee0284b3da44dbd4f2b3905da --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_83e84045fc71578eb073+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d99dc771226f91f120ddb25b037f66df0b59a9d1506cb017150067588e75dae3 +size 2040790 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fa99414006d6bb7918e760f17c9094ab4df3d3e9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34d202063a9984d602feaaf2c700afcc07ee5b1a307c2095762c14881240f14b +size 710874 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7106e34b07a75080ad2081557689f3f1e91ecf16 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9ae7a29deecc8ce3b7e+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88110d333f8f10177d95d2263884a0a33e2743edbe25cfce395001990eb2db2c +size 54508544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..89ed159f0992d75499e46e2b622789dbcbbbdd6c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee545e981e023f9ea361f2f60a26fa52fa682607225bf3b6eb0ea11fb36a86f +size 862478 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..78ff46d7bb409860438e4aa28c9069e4c0f7d3aa --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f46da234144703e575f63dbcc2d7c49b49b21c1a380b1677f907f501f3fd4f +size 6913024 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f14d7f420d19f64c4d677f329f787d78dd96838f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa860ba18e95034cf093+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876ed0fa40920c6776e14c4b6ef4b23753c635e562d81003a9284c26a28b7ef1 +size 7069565 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..da869358d2b65638774f311b76a4481cc494b1e5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a27ffe177d303d6fff091b8112fd026019009013b54d75aa67c97b2ec4c5c83 +size 861528 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a0832359db6c5c84abb380a1405d04c1970a255c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04daa061d8443bc13eca4a4a385fde4b599cbcc33b699296fd704d0173df0491 +size 4015104 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d2d9e4d97fe9a48c2f15633f38859adc78b0329c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2039c6e7168c45a13b847f5ad181344cb2ee870be409a40bf4db922bbdb9cf62 +size 4171645 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6576da20c2c72c9e8aab2406bd1af7cfb838250 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c6beff2011fa69bed4b6788c3e787d5099c70fa4cddc7b20d16c92507361a3d +size 957736 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1cdc7596ee3746e2b87b7159bb44090fd1d97ff2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731fc816c3aaf969fb002576c2d1e47134df4fd38ea4d78567343fa65916d59a +size 10630144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5667d6dd7011b617d655d45d8808e2d7784f4e32 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0bdb4d686f27baa54f6d410dbbfeef63fd90329e44bfc5e1e8027ffa394dfd7 +size 567258 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c3cc78dbb0d69621745350782a31518cf9eb7bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394132598a21a72295177dfdc3828952fab9d6d2b103429abcdbd37ce263bd80 +size 2735104