diff --git a/.gitattributes b/.gitattributes index 2f62b88ff6ad5d9fb88b5311a6489dbb40672ad7..311fa051ad70901b6fb0932ceb42e999b45cba9c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3804,3 +3804,19 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_91501952fa085a11a635+ed72d204/model.neff f neuronxcc-2.19.8089.0+8ab9f450/MODULE_efca9e0e049746cedc78+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_efca9e0e049746cedc78+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_f5968e810b1a450b5c85+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/meta-llama/Llama-3.2-1B/71d2d9a950004b288ebe.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/meta-llama/Llama-3.2-1B/71d2d9a950004b288ebe.json new file mode 100644 index 0000000000000000000000000000000000000000..8f3f8de0bd9b9bd06e1a835c1386ae1d3bfb8a84 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/llama/meta-llama/Llama-3.2-1B/71d2d9a950004b288ebe.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B", + "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3292185f0123466a93a2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3292185f0123466a93a2.json new file mode 100644 index 0000000000000000000000000000000000000000..429d5ec508676d91c576c5b8fb8767b243fd8b66 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3292185f0123466a93a2.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/b60dca7d7a9c53810fe0.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/b60dca7d7a9c53810fe0.json new file mode 100644 index 0000000000000000000000000000000000000000..a1b2246daf28fca8906f4e4bf9c1e2c4f3e1299c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/b60dca7d7a9c53810fe0.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/b9b0c081795fa9e33ca6.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/b9b0c081795fa9e33ca6.json new file mode 100644 index 0000000000000000000000000000000000000000..51b142065e587f228154151b2fac7275701acf69 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/b9b0c081795fa9e33ca6.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/5967f2b3d54a7c5a3168.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/5967f2b3d54a7c5a3168.json new file mode 100644 index 0000000000000000000000000000000000000000..0f30e9b68ea44850a115c39b400b667de90d3cf7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/5967f2b3d54a7c5a3168.json @@ -0,0 +1,149 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "988f1dde6bd2a36e3e7f45777a36507ea004fe22", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/c8a379bab933fc9c554c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/c8a379bab933fc9c554c.json new file mode 100644 index 0000000000000000000000000000000000000000..9eeea8fbb1c20a3f3ce15554ff8b7d66e660809d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/c8a379bab933fc9c554c.json @@ -0,0 +1,149 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "988f1dde6bd2a36e3e7f45777a36507ea004fe22", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/d76398f17f64a0ddfb0a.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/d76398f17f64a0ddfb0a.json new file mode 100644 index 0000000000000000000000000000000000000000..53e28b57122493bb9d2dc776137e2d678467a449 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/d76398f17f64a0ddfb0a.json @@ -0,0 +1,149 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "988f1dde6bd2a36e3e7f45777a36507ea004fe22", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/60271363e0e147450b0f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/60271363e0e147450b0f.json new file mode 100644 index 0000000000000000000000000000000000000000..9faa799d0f06affc7c8a8b59e3e2a3a97e3f4522 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/60271363e0e147450b0f.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a9e5f04b708cd397fbdd.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a9e5f04b708cd397fbdd.json new file mode 100644 index 0000000000000000000000000000000000000000..fb37f358ad0de7bb73a83394b34366ef305efa5c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a9e5f04b708cd397fbdd.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de79a3968941a6d3864f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de79a3968941a6d3864f.json new file mode 100644 index 0000000000000000000000000000000000000000..d9da159991bae0fc0c150563bd1b6afd84c4fa08 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de79a3968941a6d3864f.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/28b90633a77c30e8a187.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/28b90633a77c30e8a187.json new file mode 100644 index 0000000000000000000000000000000000000000..c0b435391b9da9188af7cecc8eb243b2759a89e3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/28b90633a77c30e8a187.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/88a97541d018d7aa0b29.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/88a97541d018d7aa0b29.json new file mode 100644 index 0000000000000000000000000000000000000000..e1a21c04aa8a7a4a7d109f9f32297b8e8b6435aa --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/88a97541d018d7aa0b29.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/d8f338a72f4bb0a4682d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/d8f338a72f4bb0a4682d.json new file mode 100644 index 0000000000000000000000000000000000000000..7a957888e6f1fcedb0c8d2f9da31b133db131d70 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/llamafactory/tiny-random-Llama-3/d8f338a72f4bb0a4682d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/unsloth/Llama-3.2-1B-Instruct/f7ed18f9ae29fab731ad.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/unsloth/Llama-3.2-1B-Instruct/f7ed18f9ae29fab731ad.json new file mode 100644 index 0000000000000000000000000000000000000000..682b2ce1a88a91843e3c33ed56fb7bf36a191db3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/llama/unsloth/Llama-3.2-1B-Instruct/f7ed18f9ae29fab731ad.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/0d648a236d7cc578c5a8.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/0d648a236d7cc578c5a8.json new file mode 100644 index 0000000000000000000000000000000000000000..65ebc6bfd7e8ab4341d3dc1b0cfe0884e1066847 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/0d648a236d7cc578c5a8.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/1228009b3909ec8d9751.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/1228009b3909ec8d9751.json new file mode 100644 index 0000000000000000000000000000000000000000..f4b8cee3ae10f86921fe342677e84d579183d0d8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/1228009b3909ec8d9751.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/1c5104d89e025d3fb8db.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/1c5104d89e025d3fb8db.json new file mode 100644 index 0000000000000000000000000000000000000000..a8f5ede5e006e63bf744754b7345bf04ebe6ebc6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/phi3/yujiepan/phi-4-tiny-random/1c5104d89e025d3fb8db.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/4f65451820dd0eadcdca.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/4f65451820dd0eadcdca.json new file mode 100644 index 0000000000000000000000000000000000000000..58b223cf9e5048fe0e5ee5e565b076f11e858053 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/4f65451820dd0eadcdca.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d7566c1b656e75712c0c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d7566c1b656e75712c0c.json new file mode 100644 index 0000000000000000000000000000000000000000..7d62c2f664f0a7e617c49182f4d11bd1745eba89 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d7566c1b656e75712c0c.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/f47233e40681eaec5807.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/f47233e40681eaec5807.json new file mode 100644 index 0000000000000000000000000000000000000000..ae35571acd8356d513eb3aa317eda223e67f371e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/f47233e40681eaec5807.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..91769abc911bad7cd76e6ed32457ed462cd20092 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26733925702af59a7abc8f84c20ccf865775eabb94345ce202abfad875a9ad01 +size 888165 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3461d58429673e184aea660c36c559f99104830e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_018530e1260c6c7cee60+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4621fb761c7e4fe70f3db98fcf513d48a57cd3df25ce6cabf49c41e0cd4408f4 +size 14828544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d1d49f554c62cb90d41d6e8d6696be09b16268bc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72287a987834149a2335abf62583736027e879d554dc31cde709dfa136503eee +size 803944 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa4e24c60f1d3aaa4695480762f78c196b2fe408 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c028818a0dff638ebbd1d62fc8de93d62b5dce39299d501c1c94bf17fec57c6f +size 3830784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4677b8fa09a46307128dc6f12db254fead28c9dc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16dadb9b6ab4bb044c1e+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b8f7aa6c0e42823cb6937ca044c0857914f7915bc5da99db9191908b277872 +size 4025809 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..63069515edfd09e1c1023da21119914a3644382c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d8642293405ae45c807bfc2078bf22602f8f53a3824b448e321d6b17fa96a5a +size 198203 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bcb7aaeaa487fc2412f1cc479e728b334331d796 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d2b7dee00e90e52700e+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc91d3344e6a7acf2ce96d573914b28841011f1bcef7ea879355a6866286c00 +size 3185664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0130dbaea6feaca2c0854661a2f78d27c8c844af --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac477097f210e0dfb28201a42aa4e76415d59b1d5dbb0eb4e4cbafed02228523 +size 154634 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be598391f0bde19f95e36f4f9b2ef14730fcd519 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_41d952393bdf990574ea+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad7307699d38332a358b4ff0dcc8b21d650f98d20c5bed693d6b8eb29a80916 +size 3144704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11fdecb3f702a93bddaf2406129125fcb2b022a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb5d459df79786becdd9fdd9f89faf2d01dd48e1dc407a6e2bb6561214e499c +size 154641 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c90d2498f2424f7e1eb0c3c0c9f19bb7d06d130e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42f2f26b5d67b722528c+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce293a6c1b9514f868bd95699df03c472d7ff9126a2fd602268ea42e6b7f5974 +size 3144704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce8ba1a9044a80d23732d6b28709da38491ccc70 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640be59bb8ab96ddc9429042250579daa625b1975213591f9366e42938417e17 +size 795264 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..46c5821d47dddf21dd063807eb53d40bcd07e86a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9461b0a955924e73f9d02e72a6b2d8e921a6c8613b5835b277000810e13daf2c +size 3820544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..62da180981f287f99c1d073e585893c523d0371a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a4f005f17fc9cd3a4ea+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46acde2ffd2a6732876b9de805b2590005aeeb01fb31144d006024a8272d5529 +size 3976689 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..db9122ca28e07000ad51c06d7cec49d20f4eb17d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9760d3d54d42bb78c35e72d673363e80136997195c2f33b331cef3264018a84 +size 887761 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b00bd149472823d849bdd6c41a1d26aedd65c8e0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_865d84616d5b312d9a15+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c0da7347755b7b92286597d83369e32edf1b27ed2beef696fb9ffd357134cb +size 14828544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f406ee451adef40f9261f92b4fbdec9688562a9e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ccf5e2ab366863472ce41ccf75894f9332a14bd414c11653c9ebf9cc13769f7 +size 849328 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7cb869b498c21defabaf20ff938d77ed31efb49a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_93f2290cdf5486e6695f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145025aa9f9858c7f0c7c0617f290792fded1126f8e2e48345d873a97d5de215 +size 14705664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d1e4ee47031ef971ad3198f5d7f1fa57da0eb239 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102a2e14e1317e0a7e45f7c54c1f6ed1c7beb8022975a6422ad8e285bd50f88d +size 907942 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8673d9a9be2316e4a317db7711a12140ace8b2a0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a58dd9102c0f9872318f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dd2a81db7c850b78423337f3b81ba944b95c7e6c66032e7a3a8f3cb433c1234 +size 10527744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a90777c3e0514f7beab29114a708e4264d279b14 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:647663fd9e50eff2278a488b3d295d0ff812a6f50cd409ef6b84a8f729d80775 +size 794868 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6c578e04fa1c9f56e79afdc4a2ca523930ba8747 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cfc9d7f13c9377271fba7d76c9ae7895c78acd0c206b7cc9a58b469ce0fe456 +size 3820544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3bb08f0357c352e734e33e45841937e1116f4728 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c42e5a3d424f33bed45c+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4f15370165f64065e2f2dd9964cfc9a289337baf7ef27a9c55e4581a3877bc +size 3976689 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..50482b5bed60dfcf318de8910cf77b873d2539c5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5645ca3411a44b2ad8de7d11a3e75e0c745779d33ecce407aa78af51d88dde +size 757115 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b72bb393a35e8305b82cc5b0f6bf45dcd8c641bb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4efac504fb89b771345847f372c5e1e5655f19ea22c4225b7f13f2ee628b87e5 +size 3646464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..910536b1183a5ae794d435ce4c3b00d2b9140d0f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e2af0f8dc3b0905ea1be+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4faf53d057f973a3cd71ae17d7ab2cb13837ce9ae1d3fbed809085e7462ccb2 +size 3802494 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..19778b02a8ac138f5091d3cb8b90f072f70f608a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd0d5a64c68b39eb03f58bba1b176f1aeac89ea516ae1b862b81e7522dd9c6b +size 154641 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5a4537c7985d4263bd9928d1d9c19ce49fcdbc9d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fb00ac5d5c23637419fa+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551720e9f420bd49a48f1505ac27dd2c477c5585c85ba59fb10e6410aa39d25c +size 3144704