diff --git a/.gitattributes b/.gitattributes index a4a724604b866a61b3be56bb00adc5c6d91abf55..635f527b2369465eb713819f72f60aa5529471d7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4008,3 +4008,23 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff f neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json new file mode 100644 index 0000000000000000000000000000000000000000..20825fbbc2cf526077ec61fe71a335fde99211b6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gpt-oss", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "tengomucho/tiny-random-gpt-oss", + "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 18, + "num_experts_per_tok": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "num_local_experts": 64, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 150000.0, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json new file mode 100644 index 0000000000000000000000000000000000000000..f30a13a7ff918bbc872d61d654c643e992eb28bf --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gpt-oss", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "tengomucho/tiny-random-gpt-oss", + "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 18, + "num_experts_per_tok": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "num_local_experts": 64, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 150000.0, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json new file mode 100644 index 0000000000000000000000000000000000000000..498edd69b410a646bc484aa5c213e09748fc25ed --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gpt-oss", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "tengomucho/tiny-random-gpt-oss", + "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 18, + "num_experts_per_tok": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "num_local_experts": 64, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 150000.0, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46936af0d2579a062b463c4bea12d1b475d82ecf --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab9a32677a94a651ba145fbae9fe9f1ce8a545105e8012934444f8d846730cce +size 238434 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9ab83e128bdf731bb21e6d6c51f3805168026d79 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19d20cba0ee881669a3e5c30bf11aadc2278ee7c0b66b0eca0c2a662c4883cfc +size 533504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..abb2209d21c2b103b85e3b78ad4508a410192f2d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:835b74ad54c0544e394e68d445455f7a11a561917054309d0fb7951aad890107 +size 87986 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..04cc2764a445c501523a544f53baa7685d9cb7ce --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9b7191800edac2a1ff9fcc7b0c4625a5a0c10255db060bb85d21a6ca7c3019 +size 369664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5b45feca0448145ea6ad3a3e22b968dd36a8519a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a601af92eb5bf2b3dcb027f5f6ac831565988baf7b0b579f970a33108966a122 +size 385657 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..353754ad3200f47b52e4a860986421406eeebe1f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb7ee995efc83075286c18ae0bd6aa668df249e6541fb44f7e8e467499146ba +size 106146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bf60ef35cb5490e550279e05bcad68e43d290cd6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94f5009fea941ff353050a32c72276e9611eb5c5985f03c7616eed0b33eb599 +size 410624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..872a052f21b4a87ee5370a92af05f10b9e94d284 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbf230dd0eb742b4e0753cb960f76abebe45d1364bcd947eef076af3ce83290 +size 426940 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2606bf0c67959b94a34a0f323316eef9805b4a18 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45058e55d159e1dddc929e5556e384548310357a373fff6c2bf022f58df66e8a +size 16598 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..debebcdac29424005d991344b7a79d4222b5683a Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c769975957511e5e82874104e5a6e36bc397ea24 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12cb605b076ecbbaf380bd5f6c104ee598d4fd16e51573ece04daba033ff668 +size 87988 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ccc8b7609316145796bd0657ba4f0f92d375ee71 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d6a56e03a8fa0612b686f0709604b9d6aef458bb0acf0c68554d2604886fbb +size 369664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ca18ded83eac26cfc090d9fc7b5c94cc6d6c17de --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:613cf200ecbd7dbd999d81e6aef20164355a7ac95854fdcfca88d9258b7ca27a +size 385657 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..98a2726d5a78d4fd057693b2912ff86ad9524d95 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1fff59be3626706ff573da8beefbc210c912f375d56de5d3090eb3d87b7ddc +size 369664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..63af9819df8c83a21127890c65b61a6eda6c38eb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f8318dd6061234a500d6b0717ebd6695551a71e51ac3816a4686cce3507c4f +size 385657 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff index 2626da2e0647985d6a4218705e2c126980c0b3a4..788508e6a883e2089b46e9a55bb4e952f411722c 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6d47b92669be6842cd9c1da3d600b7d21b63bc2e2910874ea1a32101a79f18b +oid sha256:9e890a716146d9a59af4da09e25f3d578e4354e38287ece0c077c2d7ac1621a9 size 748544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo index a67b16075929b39ac1e88326f2562058de76db78..d2f30b02da784f4247c4b8c76c0ba7e6ab2f25bf 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b6cc78f946f46795f8565376f97193c2ad1edbcd3685ec8cd98292cb036038a +oid sha256:64f5ba65cf7bb975a1835dd3098775dde405e5ab85c3e0bdf894b5f01cfb06df size 772992 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41878560c8196b2ed020e5956439abd1487ec9d3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf09e474390d8972978858d6b4d6e1e40777439b6e9201c08cff2453fbe210b +size 106150 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a3f8db9397e26d32b2a30e9a0e7e9e9f3310eb7e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb3675ee770f420c945a0564f3d3fadac9e748a323a64cdd065c5b12ceb1d26 +size 410624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0a61cbf86e9c6ab891af6603c032fd4c1f3c2cc8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf2454e4949622f1a33bbeafba975a592e566c6eb633bca25947ff4dafea710e +size 426940 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6623d28b6f1e4525109837919af49667074bf648 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b78950a0e796b44eff2c591c16a280522b577d183b3301fde8aeeb6552d2b6 +size 1844268 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..75838ec52743da90294810b06f6c2f4c28e10bab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5f8396058c368617651d6a4392d0215b5ed6033f220cfd9f01c5c9b1799f28 +size 1475584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bd9c1726ba5fd33910a89e2cc18250eb9020fab8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1fe65aedfce2d2d9807ea789de5abf6c60bf1c6371d83aee40cd09028963c9 +size 16917 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4369cfefc8859818cefe9062ea7ad088817b038e Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..404083d5fcd9b111dda2f48cf492a8baef25b09a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380a1631f162813ccfe1c8ed3b96e083e269fc0c9dfb0cbb41a55469d5a02eb5 +size 1844268 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log new file mode 100644 index 0000000000000000000000000000000000000000..e9aab7f4287cbc04cf5c6b5b8b9329007d4da4b6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_a2c3cdd4ef3f8549a44e+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_a2c3cdd4ef3f8549a44e+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb index 7e0f805e3664c1ac1cf712409e971e98fe71dfca..41c52f2411c17138f4f368722cc20704db7a4847 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87fdd0a28b51aede94ee64146624c6d123505cb5c5f67d5279671e9441277fec +oid sha256:c17d50397e7f14e98ec7a192d28f00431bee79435138647151a044684d4e3da8 size 40790698 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..40648f45e3e31d5ab3b1bab51daeace99bee6c3c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8814b75b418016d7c1631d8bf0efcb69d0b9a75e7ba9926ad4a8bb54d34fb33 +size 238432 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..483f935f9508330560646c0644c776f9d457e21b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8059f3be3b3c5c37df1526d7108731e622158f8c70c8f2a8f1e91f80be8518f9 +size 533504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..824a874ec412505f27082f6936790ef782e29ac1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32928bc7444cd0952f8f265b6dd3844ffc0d27344aa286ddaf4a94ba9a8e48b4 +size 238438 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..634d163553cd4b3aef6930f5b709a3a8e22becde --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7842f17ef3b5bd6c4a5e6f6e39c6af2e96c47b60c735f11c6f26ac5004440c +size 533504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c7ef52b37976fffc92d79f12607cb3e3fa509d93 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aedbff4b0049a132f37d37c72d3a6150491a72620679b6ca276d62cb20751ce +size 87990 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c0d44c48ff095eacd3112a83c40c94f9dee1cd4e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac39d16fa6588b4f6f45646578b28b43d3eb27ab8717dfca5bfc1bd2b2643a1 +size 369664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6f58d068ab04c604a2c41387d8d846ebb42a1270 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b499a608abb9bc616efe249ffa7f6050713fe8b2f149cd247916247876f239c +size 385657 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..369398153a013e4548f37b4dd75165a5159fe9b0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d134125eccac6ec49822bd88650c76dd16cf165791f57e17205ace1c950cfce +size 238436 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96901b19d270fd1f6d9f574aa69ad1d73c25064c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecf4b07ae924eff0a3f3afda24a4bc565b130bb6c0ffd81e8b8eb9ce7d4f666 +size 533504 diff --git a/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7b4a386d89b77f8f727b.json b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7b4a386d89b77f8f727b.json new file mode 100644 index 0000000000000000000000000000000000000000..d76dbd02629442c74858add76bee090db7b6d660 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7b4a386d89b77f8f727b.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gpt-oss", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "tengomucho/tiny-random-gpt-oss", + "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.20.9961.0+0acef03a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 18, + "num_experts_per_tok": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "num_local_experts": 64, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 150000.0, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7e626cf8640c41033045b4f585c51662e1b2b08a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73f9d43e3650be96da0fa1ba22f3e821d5602b652bf42f23b6d3ad929d1d3ac +size 1347 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83cd17003f4b807002854ec3f46a558bcecc015e Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_13170971227492778238+e30acd3a/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..da1d39eadc732fe4307a0d7045d67eeef36b2d0a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7612f1616870c8814647ebda246aedd11ac0e8348be3f2f20f2c8f5d493a05e8 +size 19445143 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1e1d144e6890b67f5201f0bb5f262b1fa70b263a --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06eb3691e0504e1ff78ee8938713878d6d7c00625b1e02fa0fe5b7c340a4d98 +size 7394304 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..12f379aac4894bc07c5fcc1eace7d13e32367892 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_571edbb6-1fee-4093-8aae-62512fe79629/compiler_workdir/CustomRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a44c75f7bc484cd172d57ed8af67e8b2924350e2 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a2fe65ab4e5f3ceed81f33bd32efc2c339fd942064e3d586f7c14ddf862d3a +size 1199 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7f9b2abecc7ab544d981be1519f3bc24c1e5b2f1 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7e13f42e174286664dca86b4c08b10ed87589b41 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_aabcc744dcf70c4d6d79+a4f3cd01/wrapped_neff.hlo differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe001e38eda1c9b638d1480dffc5996c63772699 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eab57e4cc40c62ef5dd3f827472fc903e21efbf045d8f789d33a6970a85874a +size 87904 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..adb9df4522962e3718ce44113df15497cae17d14 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c621a1a8f590d66e8b807ba1d04679a68230448faa72fcc4b7254d6dc1f0539 +size 472064 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6aabcdd30b87066784b10d5e28efb1b9afb26967 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b04a064e90ba5969d6e408582cded478b038de86df72543e7094bf19aaa469 +size 488089 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..899a62dae2242e668e6fddfdfb752188f637af47 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd1b281e4611ff03fce37e36652c3aeee6b3254431ee7bcf4d1f4fa67144ee7 +size 16598 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ec4edbfca5d048cd6f35843686792df3eb47927c Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_d45744407aff6f28ad1c+431f5505/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/compile_flags.json b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ccbb17723895548b611dcf9155e476bf4341df --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_abcdbed7-9f4f-4d98-8444-741cedb34811/compiler_workdir/CustomRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.done b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.hlo_module.pb b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a44c75f7bc484cd172d57ed8af67e8b2924350e2 --- /dev/null +++ b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a2fe65ab4e5f3ceed81f33bd32efc2c339fd942064e3d586f7c14ddf862d3a +size 1199 diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.neff b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa4b355156a9eb48a32129036f84a74d714a11c3 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/model.neff differ diff --git a/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/wrapped_neff.hlo b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f4f19c9858d4c02cb59cc311dcb6549bf5b14680 Binary files /dev/null and b/neuronxcc-2.20.9961.0+0acef03a/MODULE_e6234fd593d565924bae+d233ce80/wrapped_neff.hlo differ