Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +42 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json +59 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json +59 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json +63 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json +63 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json +64 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json +59 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json +59 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json +60 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json +60 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json +83 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json +65 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json +65 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json +66 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json +66 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log +116 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json +1 -0
.gitattributes
CHANGED
@@ -4516,3 +4516,45 @@ neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/model.neff f
|
|
4516 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4517 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text
|
4518 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4516 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4517 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text
|
4518 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4519 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4520 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
|
4521 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4522 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
|
4523 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
|
4524 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4525 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4526 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4527 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4528 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4529 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
|
4530 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4531 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4532 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4533 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4534 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4535 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4536 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
|
4537 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4538 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4539 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4540 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4541 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4542 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4543 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4544 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4545 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4546 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
|
4547 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4548 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4549 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4550 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4551 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4552 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4553 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4554 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
|
4555 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4556 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
|
4557 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4558 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4559 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4560 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GraniteForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"attention_multiplier": 1.0,
|
11 |
+
"embedding_multiplier": 1.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 32,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 64,
|
16 |
+
"logits_scaling": 1.0,
|
17 |
+
"max_position_embeddings": 2048,
|
18 |
+
"mlp_bias": false,
|
19 |
+
"model_type": "granite",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "NxDNeuronConfig",
|
22 |
+
"batch_size": 2,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
25 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
26 |
+
"continuous_batching": true,
|
27 |
+
"enable_bucketing": false,
|
28 |
+
"ep_degree": 1,
|
29 |
+
"fused_qkv": true,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"local_ranks_size": 2,
|
32 |
+
"logical_nc_config": 1,
|
33 |
+
"max_batch_size": 2,
|
34 |
+
"max_context_length": 1024,
|
35 |
+
"max_topk": 256,
|
36 |
+
"n_active_tokens": 1024,
|
37 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
38 |
+
"on_device_sampling": true,
|
39 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
40 |
+
"output_logits": false,
|
41 |
+
"pp_degree": 1,
|
42 |
+
"sequence_length": 1024,
|
43 |
+
"speculation_length": 0,
|
44 |
+
"start_rank_id": 0,
|
45 |
+
"target": null,
|
46 |
+
"torch_dtype": "float16",
|
47 |
+
"tp_degree": 2
|
48 |
+
},
|
49 |
+
"num_attention_heads": 4,
|
50 |
+
"num_hidden_layers": 2,
|
51 |
+
"num_key_value_heads": 4,
|
52 |
+
"residual_multiplier": 1.0,
|
53 |
+
"rms_norm_eps": 1e-06,
|
54 |
+
"rope_scaling": null,
|
55 |
+
"rope_theta": 10000.0,
|
56 |
+
"tie_word_embeddings": false,
|
57 |
+
"use_cache": true,
|
58 |
+
"vocab_size": 49152
|
59 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GraniteForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"attention_multiplier": 1.0,
|
11 |
+
"embedding_multiplier": 1.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 32,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 64,
|
16 |
+
"logits_scaling": 1.0,
|
17 |
+
"max_position_embeddings": 2048,
|
18 |
+
"mlp_bias": false,
|
19 |
+
"model_type": "granite",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "NxDNeuronConfig",
|
22 |
+
"batch_size": 1,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
25 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
26 |
+
"continuous_batching": false,
|
27 |
+
"enable_bucketing": false,
|
28 |
+
"ep_degree": 1,
|
29 |
+
"fused_qkv": true,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"local_ranks_size": 2,
|
32 |
+
"logical_nc_config": 1,
|
33 |
+
"max_batch_size": 1,
|
34 |
+
"max_context_length": 1024,
|
35 |
+
"max_topk": 256,
|
36 |
+
"n_active_tokens": 1024,
|
37 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
38 |
+
"on_device_sampling": true,
|
39 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
40 |
+
"output_logits": false,
|
41 |
+
"pp_degree": 1,
|
42 |
+
"sequence_length": 1024,
|
43 |
+
"speculation_length": 0,
|
44 |
+
"start_rank_id": 0,
|
45 |
+
"target": null,
|
46 |
+
"torch_dtype": "bfloat16",
|
47 |
+
"tp_degree": 2
|
48 |
+
},
|
49 |
+
"num_attention_heads": 4,
|
50 |
+
"num_hidden_layers": 2,
|
51 |
+
"num_key_value_heads": 4,
|
52 |
+
"residual_multiplier": 1.0,
|
53 |
+
"rms_norm_eps": 1e-06,
|
54 |
+
"rope_scaling": null,
|
55 |
+
"rope_theta": 10000.0,
|
56 |
+
"tie_word_embeddings": false,
|
57 |
+
"use_cache": true,
|
58 |
+
"vocab_size": 49152
|
59 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"batch_size": 2,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
23 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
24 |
+
"continuous_batching": true,
|
25 |
+
"enable_bucketing": false,
|
26 |
+
"ep_degree": 1,
|
27 |
+
"fused_qkv": true,
|
28 |
+
"glu_mlp": true,
|
29 |
+
"local_ranks_size": 2,
|
30 |
+
"logical_nc_config": 1,
|
31 |
+
"max_batch_size": 2,
|
32 |
+
"max_context_length": 1024,
|
33 |
+
"max_topk": 256,
|
34 |
+
"n_active_tokens": 1024,
|
35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
36 |
+
"on_device_sampling": true,
|
37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
38 |
+
"output_logits": false,
|
39 |
+
"pp_degree": 1,
|
40 |
+
"sequence_length": 1024,
|
41 |
+
"speculation_length": 0,
|
42 |
+
"start_rank_id": 0,
|
43 |
+
"target": null,
|
44 |
+
"torch_dtype": "float16",
|
45 |
+
"tp_degree": 2
|
46 |
+
},
|
47 |
+
"num_attention_heads": 4,
|
48 |
+
"num_hidden_layers": 2,
|
49 |
+
"num_key_value_heads": 4,
|
50 |
+
"pretraining_tp": 1,
|
51 |
+
"rms_norm_eps": 1e-05,
|
52 |
+
"rope_scaling": {
|
53 |
+
"factor": 8.0,
|
54 |
+
"high_freq_factor": 4.0,
|
55 |
+
"low_freq_factor": 1.0,
|
56 |
+
"original_max_position_embeddings": 8192,
|
57 |
+
"rope_type": "llama3"
|
58 |
+
},
|
59 |
+
"rope_theta": 500000.0,
|
60 |
+
"tie_word_embeddings": false,
|
61 |
+
"use_cache": true,
|
62 |
+
"vocab_size": 128256
|
63 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"batch_size": 1,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
23 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
24 |
+
"continuous_batching": false,
|
25 |
+
"enable_bucketing": false,
|
26 |
+
"ep_degree": 1,
|
27 |
+
"fused_qkv": true,
|
28 |
+
"glu_mlp": true,
|
29 |
+
"local_ranks_size": 2,
|
30 |
+
"logical_nc_config": 1,
|
31 |
+
"max_batch_size": 1,
|
32 |
+
"max_context_length": 1024,
|
33 |
+
"max_topk": 256,
|
34 |
+
"n_active_tokens": 1024,
|
35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
36 |
+
"on_device_sampling": true,
|
37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
38 |
+
"output_logits": false,
|
39 |
+
"pp_degree": 1,
|
40 |
+
"sequence_length": 1024,
|
41 |
+
"speculation_length": 0,
|
42 |
+
"start_rank_id": 0,
|
43 |
+
"target": null,
|
44 |
+
"torch_dtype": "bfloat16",
|
45 |
+
"tp_degree": 2
|
46 |
+
},
|
47 |
+
"num_attention_heads": 4,
|
48 |
+
"num_hidden_layers": 2,
|
49 |
+
"num_key_value_heads": 4,
|
50 |
+
"pretraining_tp": 1,
|
51 |
+
"rms_norm_eps": 1e-05,
|
52 |
+
"rope_scaling": {
|
53 |
+
"factor": 8.0,
|
54 |
+
"high_freq_factor": 4.0,
|
55 |
+
"low_freq_factor": 1.0,
|
56 |
+
"original_max_position_embeddings": 8192,
|
57 |
+
"rope_type": "llama3"
|
58 |
+
},
|
59 |
+
"rope_theta": 500000.0,
|
60 |
+
"tie_word_embeddings": false,
|
61 |
+
"use_cache": true,
|
62 |
+
"vocab_size": 128256
|
63 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 2048,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 8192,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"batch_size": 1,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
23 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
24 |
+
"continuous_batching": false,
|
25 |
+
"enable_bucketing": false,
|
26 |
+
"ep_degree": 1,
|
27 |
+
"fused_qkv": true,
|
28 |
+
"glu_mlp": true,
|
29 |
+
"local_ranks_size": 24,
|
30 |
+
"logical_nc_config": 1,
|
31 |
+
"max_batch_size": 1,
|
32 |
+
"max_context_length": 4096,
|
33 |
+
"max_topk": 256,
|
34 |
+
"n_active_tokens": 4096,
|
35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
36 |
+
"on_device_sampling": true,
|
37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
38 |
+
"output_logits": false,
|
39 |
+
"pp_degree": 1,
|
40 |
+
"sequence_length": 4096,
|
41 |
+
"speculation_length": 0,
|
42 |
+
"start_rank_id": 0,
|
43 |
+
"target": null,
|
44 |
+
"torch_dtype": "bfloat16",
|
45 |
+
"tp_degree": 24
|
46 |
+
},
|
47 |
+
"num_attention_heads": 32,
|
48 |
+
"num_hidden_layers": 16,
|
49 |
+
"num_key_value_heads": 8,
|
50 |
+
"pretraining_tp": 1,
|
51 |
+
"rms_norm_eps": 1e-05,
|
52 |
+
"rope_scaling": {
|
53 |
+
"factor": 32.0,
|
54 |
+
"high_freq_factor": 4.0,
|
55 |
+
"low_freq_factor": 1.0,
|
56 |
+
"original_max_position_embeddings": 8192,
|
57 |
+
"rope_type": "llama3"
|
58 |
+
},
|
59 |
+
"rope_theta": 500000.0,
|
60 |
+
"tie_word_embeddings": true,
|
61 |
+
"unsloth_fixed": true,
|
62 |
+
"use_cache": true,
|
63 |
+
"vocab_size": 128256
|
64 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"MixtralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"head_dim": 32,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3584,
|
14 |
+
"max_position_embeddings": 1024,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "NxDNeuronConfig",
|
18 |
+
"batch_size": 1,
|
19 |
+
"capacity_factor": null,
|
20 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
21 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
22 |
+
"continuous_batching": false,
|
23 |
+
"enable_bucketing": false,
|
24 |
+
"ep_degree": 1,
|
25 |
+
"fused_qkv": false,
|
26 |
+
"glu_mlp": true,
|
27 |
+
"local_ranks_size": 2,
|
28 |
+
"logical_nc_config": 1,
|
29 |
+
"max_batch_size": 1,
|
30 |
+
"max_context_length": 1024,
|
31 |
+
"max_topk": 256,
|
32 |
+
"n_active_tokens": 1024,
|
33 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
34 |
+
"on_device_sampling": false,
|
35 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
36 |
+
"output_logits": false,
|
37 |
+
"pp_degree": 1,
|
38 |
+
"sequence_length": 1024,
|
39 |
+
"speculation_length": 0,
|
40 |
+
"start_rank_id": 0,
|
41 |
+
"target": null,
|
42 |
+
"torch_dtype": "bfloat16",
|
43 |
+
"tp_degree": 2
|
44 |
+
},
|
45 |
+
"num_attention_heads": 32,
|
46 |
+
"num_experts_per_tok": 2,
|
47 |
+
"num_hidden_layers": 2,
|
48 |
+
"num_key_value_heads": 8,
|
49 |
+
"num_local_experts": 8,
|
50 |
+
"output_router_logits": false,
|
51 |
+
"rms_norm_eps": 1e-05,
|
52 |
+
"rope_theta": 10000.0,
|
53 |
+
"router_aux_loss_coef": 0.001,
|
54 |
+
"router_jitter_noise": 0.0,
|
55 |
+
"sliding_window": 4096,
|
56 |
+
"tie_word_embeddings": false,
|
57 |
+
"use_cache": true,
|
58 |
+
"vocab_size": 32000
|
59 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"MixtralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"head_dim": 32,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3584,
|
14 |
+
"max_position_embeddings": 1024,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "NxDNeuronConfig",
|
18 |
+
"batch_size": 2,
|
19 |
+
"capacity_factor": null,
|
20 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
21 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
22 |
+
"continuous_batching": false,
|
23 |
+
"enable_bucketing": false,
|
24 |
+
"ep_degree": 1,
|
25 |
+
"fused_qkv": false,
|
26 |
+
"glu_mlp": true,
|
27 |
+
"local_ranks_size": 2,
|
28 |
+
"logical_nc_config": 1,
|
29 |
+
"max_batch_size": 2,
|
30 |
+
"max_context_length": 1024,
|
31 |
+
"max_topk": 256,
|
32 |
+
"n_active_tokens": 1024,
|
33 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
34 |
+
"on_device_sampling": false,
|
35 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
36 |
+
"output_logits": false,
|
37 |
+
"pp_degree": 1,
|
38 |
+
"sequence_length": 1024,
|
39 |
+
"speculation_length": 0,
|
40 |
+
"start_rank_id": 0,
|
41 |
+
"target": null,
|
42 |
+
"torch_dtype": "float16",
|
43 |
+
"tp_degree": 2
|
44 |
+
},
|
45 |
+
"num_attention_heads": 32,
|
46 |
+
"num_experts_per_tok": 2,
|
47 |
+
"num_hidden_layers": 2,
|
48 |
+
"num_key_value_heads": 8,
|
49 |
+
"num_local_experts": 8,
|
50 |
+
"output_router_logits": false,
|
51 |
+
"rms_norm_eps": 1e-05,
|
52 |
+
"rope_theta": 10000.0,
|
53 |
+
"router_aux_loss_coef": 0.001,
|
54 |
+
"router_jitter_noise": 0.0,
|
55 |
+
"sliding_window": 4096,
|
56 |
+
"tie_word_embeddings": false,
|
57 |
+
"use_cache": true,
|
58 |
+
"vocab_size": 32000
|
59 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Phi3ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"auto_map": {},
|
11 |
+
"embd_pdrop": 0.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 16,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 32,
|
16 |
+
"max_position_embeddings": 16384,
|
17 |
+
"model_type": "phi3",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"batch_size": 2,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
23 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
24 |
+
"continuous_batching": true,
|
25 |
+
"enable_bucketing": false,
|
26 |
+
"ep_degree": 1,
|
27 |
+
"fused_qkv": true,
|
28 |
+
"glu_mlp": true,
|
29 |
+
"local_ranks_size": 2,
|
30 |
+
"logical_nc_config": 1,
|
31 |
+
"max_batch_size": 2,
|
32 |
+
"max_context_length": 1024,
|
33 |
+
"max_topk": 256,
|
34 |
+
"n_active_tokens": 1024,
|
35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
36 |
+
"on_device_sampling": true,
|
37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
38 |
+
"output_logits": false,
|
39 |
+
"pp_degree": 1,
|
40 |
+
"sequence_length": 1024,
|
41 |
+
"speculation_length": 0,
|
42 |
+
"start_rank_id": 0,
|
43 |
+
"target": null,
|
44 |
+
"torch_dtype": "float16",
|
45 |
+
"tp_degree": 2
|
46 |
+
},
|
47 |
+
"num_attention_heads": 2,
|
48 |
+
"num_hidden_layers": 2,
|
49 |
+
"num_key_value_heads": 1,
|
50 |
+
"original_max_position_embeddings": 16384,
|
51 |
+
"partial_rotary_factor": 1.0,
|
52 |
+
"resid_pdrop": 0.0,
|
53 |
+
"rms_norm_eps": 1e-05,
|
54 |
+
"rope_scaling": null,
|
55 |
+
"rope_theta": 250000,
|
56 |
+
"sliding_window": null,
|
57 |
+
"tie_word_embeddings": false,
|
58 |
+
"use_cache": true,
|
59 |
+
"vocab_size": 100352
|
60 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Phi3ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"auto_map": {},
|
11 |
+
"embd_pdrop": 0.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 16,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 32,
|
16 |
+
"max_position_embeddings": 16384,
|
17 |
+
"model_type": "phi3",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"batch_size": 1,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
23 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
24 |
+
"continuous_batching": false,
|
25 |
+
"enable_bucketing": false,
|
26 |
+
"ep_degree": 1,
|
27 |
+
"fused_qkv": true,
|
28 |
+
"glu_mlp": true,
|
29 |
+
"local_ranks_size": 2,
|
30 |
+
"logical_nc_config": 1,
|
31 |
+
"max_batch_size": 1,
|
32 |
+
"max_context_length": 1024,
|
33 |
+
"max_topk": 256,
|
34 |
+
"n_active_tokens": 1024,
|
35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
36 |
+
"on_device_sampling": true,
|
37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
38 |
+
"output_logits": false,
|
39 |
+
"pp_degree": 1,
|
40 |
+
"sequence_length": 1024,
|
41 |
+
"speculation_length": 0,
|
42 |
+
"start_rank_id": 0,
|
43 |
+
"target": null,
|
44 |
+
"torch_dtype": "bfloat16",
|
45 |
+
"tp_degree": 2
|
46 |
+
},
|
47 |
+
"num_attention_heads": 2,
|
48 |
+
"num_hidden_layers": 2,
|
49 |
+
"num_key_value_heads": 1,
|
50 |
+
"original_max_position_embeddings": 16384,
|
51 |
+
"partial_rotary_factor": 1.0,
|
52 |
+
"resid_pdrop": 0.0,
|
53 |
+
"rms_norm_eps": 1e-05,
|
54 |
+
"rope_scaling": null,
|
55 |
+
"rope_theta": 250000,
|
56 |
+
"sliding_window": null,
|
57 |
+
"tie_word_embeddings": false,
|
58 |
+
"use_cache": true,
|
59 |
+
"vocab_size": 100352
|
60 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "Qwen/Qwen2.5-0.5B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen2ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 896,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 4864,
|
13 |
+
"layer_types": [
|
14 |
+
"full_attention",
|
15 |
+
"full_attention",
|
16 |
+
"full_attention",
|
17 |
+
"full_attention",
|
18 |
+
"full_attention",
|
19 |
+
"full_attention",
|
20 |
+
"full_attention",
|
21 |
+
"full_attention",
|
22 |
+
"full_attention",
|
23 |
+
"full_attention",
|
24 |
+
"full_attention",
|
25 |
+
"full_attention",
|
26 |
+
"full_attention",
|
27 |
+
"full_attention",
|
28 |
+
"full_attention",
|
29 |
+
"full_attention",
|
30 |
+
"full_attention",
|
31 |
+
"full_attention",
|
32 |
+
"full_attention",
|
33 |
+
"full_attention",
|
34 |
+
"full_attention",
|
35 |
+
"full_attention",
|
36 |
+
"full_attention",
|
37 |
+
"full_attention"
|
38 |
+
],
|
39 |
+
"max_position_embeddings": 32768,
|
40 |
+
"max_window_layers": 24,
|
41 |
+
"model_type": "qwen2",
|
42 |
+
"neuron": {
|
43 |
+
"_serialized_key": "NxDNeuronConfig",
|
44 |
+
"batch_size": 1,
|
45 |
+
"capacity_factor": null,
|
46 |
+
"checkpoint_id": "Qwen/Qwen2.5-0.5B",
|
47 |
+
"checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
|
48 |
+
"continuous_batching": false,
|
49 |
+
"enable_bucketing": false,
|
50 |
+
"ep_degree": 1,
|
51 |
+
"fused_qkv": false,
|
52 |
+
"glu_mlp": true,
|
53 |
+
"local_ranks_size": 2,
|
54 |
+
"logical_nc_config": 1,
|
55 |
+
"max_batch_size": 1,
|
56 |
+
"max_context_length": 4096,
|
57 |
+
"max_topk": 256,
|
58 |
+
"n_active_tokens": 4096,
|
59 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
60 |
+
"on_device_sampling": true,
|
61 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
62 |
+
"output_logits": false,
|
63 |
+
"pp_degree": 1,
|
64 |
+
"sequence_length": 4096,
|
65 |
+
"speculation_length": 0,
|
66 |
+
"start_rank_id": 0,
|
67 |
+
"target": null,
|
68 |
+
"torch_dtype": "bfloat16",
|
69 |
+
"tp_degree": 2
|
70 |
+
},
|
71 |
+
"num_attention_heads": 14,
|
72 |
+
"num_hidden_layers": 24,
|
73 |
+
"num_key_value_heads": 2,
|
74 |
+
"rms_norm_eps": 1e-06,
|
75 |
+
"rope_scaling": null,
|
76 |
+
"rope_theta": 1000000.0,
|
77 |
+
"sliding_window": null,
|
78 |
+
"tie_word_embeddings": true,
|
79 |
+
"use_cache": true,
|
80 |
+
"use_mrope": false,
|
81 |
+
"use_sliding_window": false,
|
82 |
+
"vocab_size": 151936
|
83 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen2ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 16,
|
13 |
+
"layer_types": [
|
14 |
+
"full_attention",
|
15 |
+
"full_attention"
|
16 |
+
],
|
17 |
+
"max_position_embeddings": 32768,
|
18 |
+
"max_window_layers": 1,
|
19 |
+
"model_type": "qwen2",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "NxDNeuronConfig",
|
22 |
+
"batch_size": 2,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
25 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
26 |
+
"continuous_batching": true,
|
27 |
+
"enable_bucketing": false,
|
28 |
+
"ep_degree": 1,
|
29 |
+
"fused_qkv": false,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"local_ranks_size": 2,
|
32 |
+
"logical_nc_config": 1,
|
33 |
+
"max_batch_size": 2,
|
34 |
+
"max_context_length": 1024,
|
35 |
+
"max_topk": 256,
|
36 |
+
"n_active_tokens": 1024,
|
37 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
38 |
+
"on_device_sampling": false,
|
39 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
40 |
+
"output_logits": false,
|
41 |
+
"pp_degree": 1,
|
42 |
+
"sequence_length": 1024,
|
43 |
+
"speculation_length": 0,
|
44 |
+
"start_rank_id": 0,
|
45 |
+
"target": null,
|
46 |
+
"torch_dtype": "float16",
|
47 |
+
"tp_degree": 2
|
48 |
+
},
|
49 |
+
"num_attention_heads": 4,
|
50 |
+
"num_hidden_layers": 2,
|
51 |
+
"num_key_value_heads": 2,
|
52 |
+
"rms_norm_eps": 1e-06,
|
53 |
+
"rope_scaling": {
|
54 |
+
"factor": 4.0,
|
55 |
+
"original_max_position_embeddings": 32768,
|
56 |
+
"rope_type": "yarn",
|
57 |
+
"type": "yarn"
|
58 |
+
},
|
59 |
+
"rope_theta": 1000000.0,
|
60 |
+
"sliding_window": null,
|
61 |
+
"tie_word_embeddings": false,
|
62 |
+
"use_cache": true,
|
63 |
+
"use_sliding_window": false,
|
64 |
+
"vocab_size": 152064
|
65 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen2ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 16,
|
13 |
+
"layer_types": [
|
14 |
+
"full_attention",
|
15 |
+
"full_attention"
|
16 |
+
],
|
17 |
+
"max_position_embeddings": 32768,
|
18 |
+
"max_window_layers": 1,
|
19 |
+
"model_type": "qwen2",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "NxDNeuronConfig",
|
22 |
+
"batch_size": 1,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
25 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
26 |
+
"continuous_batching": false,
|
27 |
+
"enable_bucketing": false,
|
28 |
+
"ep_degree": 1,
|
29 |
+
"fused_qkv": false,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"local_ranks_size": 2,
|
32 |
+
"logical_nc_config": 1,
|
33 |
+
"max_batch_size": 1,
|
34 |
+
"max_context_length": 1024,
|
35 |
+
"max_topk": 256,
|
36 |
+
"n_active_tokens": 1024,
|
37 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
38 |
+
"on_device_sampling": true,
|
39 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
40 |
+
"output_logits": false,
|
41 |
+
"pp_degree": 1,
|
42 |
+
"sequence_length": 1024,
|
43 |
+
"speculation_length": 0,
|
44 |
+
"start_rank_id": 0,
|
45 |
+
"target": null,
|
46 |
+
"torch_dtype": "bfloat16",
|
47 |
+
"tp_degree": 2
|
48 |
+
},
|
49 |
+
"num_attention_heads": 4,
|
50 |
+
"num_hidden_layers": 2,
|
51 |
+
"num_key_value_heads": 2,
|
52 |
+
"rms_norm_eps": 1e-06,
|
53 |
+
"rope_scaling": {
|
54 |
+
"factor": 4.0,
|
55 |
+
"original_max_position_embeddings": 32768,
|
56 |
+
"rope_type": "yarn",
|
57 |
+
"type": "yarn"
|
58 |
+
},
|
59 |
+
"rope_theta": 1000000.0,
|
60 |
+
"sliding_window": null,
|
61 |
+
"tie_word_embeddings": false,
|
62 |
+
"use_cache": true,
|
63 |
+
"use_sliding_window": false,
|
64 |
+
"vocab_size": 152064
|
65 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen3MoeForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"decoder_sparse_step": 2,
|
11 |
+
"head_dim": 32,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 64,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 128,
|
16 |
+
"max_position_embeddings": 40960,
|
17 |
+
"max_window_layers": 1,
|
18 |
+
"mlp_only_layers": [],
|
19 |
+
"model_type": "qwen3_moe",
|
20 |
+
"moe_intermediate_size": 128,
|
21 |
+
"neuron": {
|
22 |
+
"_serialized_key": "NxDNeuronConfig",
|
23 |
+
"batch_size": 2,
|
24 |
+
"capacity_factor": null,
|
25 |
+
"checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
26 |
+
"checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"fused_qkv": false,
|
31 |
+
"glu_mlp": true,
|
32 |
+
"local_ranks_size": 2,
|
33 |
+
"logical_nc_config": 1,
|
34 |
+
"max_batch_size": 2,
|
35 |
+
"max_context_length": 1024,
|
36 |
+
"max_topk": 256,
|
37 |
+
"n_active_tokens": 1024,
|
38 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
39 |
+
"on_device_sampling": false,
|
40 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
41 |
+
"output_logits": false,
|
42 |
+
"pp_degree": 1,
|
43 |
+
"sequence_length": 1024,
|
44 |
+
"speculation_length": 0,
|
45 |
+
"start_rank_id": 0,
|
46 |
+
"target": null,
|
47 |
+
"torch_dtype": "float16",
|
48 |
+
"tp_degree": 2
|
49 |
+
},
|
50 |
+
"norm_topk_prob": true,
|
51 |
+
"num_attention_heads": 2,
|
52 |
+
"num_experts": 8,
|
53 |
+
"num_experts_per_tok": 2,
|
54 |
+
"num_hidden_layers": 2,
|
55 |
+
"num_key_value_heads": 1,
|
56 |
+
"output_router_logits": false,
|
57 |
+
"rms_norm_eps": 1e-06,
|
58 |
+
"rope_scaling": null,
|
59 |
+
"rope_theta": 1000000.0,
|
60 |
+
"router_aux_loss_coef": 0.001,
|
61 |
+
"sliding_window": null,
|
62 |
+
"tie_word_embeddings": true,
|
63 |
+
"use_cache": true,
|
64 |
+
"use_sliding_window": false,
|
65 |
+
"vocab_size": 151936
|
66 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen3MoeForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"decoder_sparse_step": 2,
|
11 |
+
"head_dim": 32,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 64,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 128,
|
16 |
+
"max_position_embeddings": 40960,
|
17 |
+
"max_window_layers": 1,
|
18 |
+
"mlp_only_layers": [],
|
19 |
+
"model_type": "qwen3_moe",
|
20 |
+
"moe_intermediate_size": 128,
|
21 |
+
"neuron": {
|
22 |
+
"_serialized_key": "NxDNeuronConfig",
|
23 |
+
"batch_size": 1,
|
24 |
+
"capacity_factor": null,
|
25 |
+
"checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
26 |
+
"checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"fused_qkv": false,
|
31 |
+
"glu_mlp": true,
|
32 |
+
"local_ranks_size": 2,
|
33 |
+
"logical_nc_config": 1,
|
34 |
+
"max_batch_size": 1,
|
35 |
+
"max_context_length": 1024,
|
36 |
+
"max_topk": 256,
|
37 |
+
"n_active_tokens": 1024,
|
38 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
39 |
+
"on_device_sampling": false,
|
40 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
41 |
+
"output_logits": false,
|
42 |
+
"pp_degree": 1,
|
43 |
+
"sequence_length": 1024,
|
44 |
+
"speculation_length": 0,
|
45 |
+
"start_rank_id": 0,
|
46 |
+
"target": null,
|
47 |
+
"torch_dtype": "bfloat16",
|
48 |
+
"tp_degree": 2
|
49 |
+
},
|
50 |
+
"norm_topk_prob": true,
|
51 |
+
"num_attention_heads": 2,
|
52 |
+
"num_experts": 8,
|
53 |
+
"num_experts_per_tok": 2,
|
54 |
+
"num_hidden_layers": 2,
|
55 |
+
"num_key_value_heads": 1,
|
56 |
+
"output_router_logits": false,
|
57 |
+
"rms_norm_eps": 1e-06,
|
58 |
+
"rope_scaling": null,
|
59 |
+
"rope_theta": 1000000.0,
|
60 |
+
"router_aux_loss_coef": 0.001,
|
61 |
+
"sliding_window": null,
|
62 |
+
"tie_word_embeddings": true,
|
63 |
+
"use_cache": true,
|
64 |
+
"use_sliding_window": false,
|
65 |
+
"vocab_size": 151936
|
66 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd7dae1ac13e262e1a7f9472b6a31e792e9c78b1214495916d9e83666910cb83
|
3 |
+
size 567258
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f6d4d1dc08dd89019640186d5368b1a308f0bd285a59f3f2e2a97e2bfc50b89
|
3 |
+
size 14961664
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:beff4a7ab7f70afbb24a85c88ce24e5bc5cfae6de236e3f9686176defedd5222
|
3 |
+
size 81016
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc275f95550b0e1be60dc7ed4bbc81233eab5133b3b0af0f576bae41fda91887
|
3 |
+
size 297984
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62347f65c836de46507f6affd655cd1f15290142541de20acc56feac1fcef987
|
3 |
+
size 308436
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43d7647959b5592dd250b2a9f5c68330b6f6bca38bd34f9fb881d2958e03f881
|
3 |
+
size 83591
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75e76aff9b7791f1118cf7da468310c32943d4a8a828a53d19775178041d8deb
|
3 |
+
size 707584
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c66551e445c34b0e221ecb2b2b7303796c4e0eb8ca338b4c297bebe314fed43
|
3 |
+
size 69881
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:876faeaea6b0587c3ff8663d3114f70d0484e607ab88e735a275ad01c223cbc9
|
3 |
+
size 236544
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ebfa9813b63ed97dbe6925f3ba4e5a94ea93bd0ad008070dc32430f87d9bd72
|
3 |
+
size 247874
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2cfa8e742c4893766c3b63b6971e73469c20ed6ed959007a32f73a5d6e66751
|
3 |
+
size 81550
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: usage: neuronx-cc [-h] [--compatible-mode | --no-compatible-mode] [--disable-expensive-checks | --no-disable-expensive-checks]
|
2 |
+
[--fp16-bilinear-upsampling | --no-fp16-bilinear-upsampling] [--tensor-no-opt-pass [TENSOR_NO_OPT_PASS]] [--set-tensors-no-opt [SET_TENSORS_NO_OPT]]
|
3 |
+
[--model-specific-opt [MODEL_SPECIFIC_OPT]] [--statebuffer-scratch-size-in-bytes [STATEBUFFER_SCRATCH_SIZE_IN_BYTES]] [--target-mm-vec-size [TARGET_MM_VEC_SIZE]]
|
4 |
+
[--disable-global-redundant-load-elimination | --no-disable-global-redundant-load-elimination] [--avoid-loop-reduce | --no-avoid-loop-reduce]
|
5 |
+
[--disable-partition-vectorization | --no-disable-partition-vectorization]
|
6 |
+
[--disable-tiling-of-non-overlapping-mem-access | --no-disable-tiling-of-non-overlapping-mem-access] [--weight-coalescing-threshold [WEIGHT_COALESCING_THRESHOLD]]
|
7 |
+
[--static-weights | --no-static-weights] [--tensor-layout-p-order [TENSOR_LAYOUT_P_ORDER]] [--tensor-layout-b-order [TENSOR_LAYOUT_B_ORDER]]
|
8 |
+
[--tensor-layout-f-order [TENSOR_LAYOUT_F_ORDER]] [--fp32-cast [FP32_CAST]] [--enable-replication | --no-enable-replication]
|
9 |
+
[--use-inferentia-hwm | --no-use-inferentia-hwm] [--hbm-scratchpad-page-size-in-bytes [HBM_SCRATCHPAD_PAGE_SIZE_IN_BYTES]]
|
10 |
+
[--enable-tensorized-spiller | --no-enable-tensorized-spiller] [--disable-spill-free-kernels | --no-disable-spill-free-kernels]
|
11 |
+
[--enable-smt-allocator | --no-enable-smt-allocator] [--nki-manual-allocation | --no-nki-manual-allocation] [--enable-softmax-kernel | --no-enable-softmax-kernel]
|
12 |
+
[--softmax-division-delay | --no-softmax-division-delay] [--accumulate-on-alu-dtype | --no-accumulate-on-alu-dtype]
|
13 |
+
[--enable-shard-axis-verifier | --no-enable-shard-axis-verifier] [--non-local-tripcount-threshold [NON_LOCAL_TRIPCOUNT_THRESHOLD]]
|
14 |
+
[--force-non-local-tensors [FORCE_NON_LOCAL_TENSORS]] [--force-concat-to-non-local | --no-force-concat-to-non-local]
|
15 |
+
[--force-all-matmult-input-non-local | --no-force-all-matmult-input-non-local] [--large-1d-tensor-threshold [LARGE_1D_TENSOR_THRESHOLD]] [--dump-after [DUMP_AFTER]]
|
16 |
+
[--dump-path [DUMP_PATH]] [--dump-files | --no-dump-files] [--save-weights | --no-save-weights] [--dump-nki | --no-dump-nki]
|
17 |
+
[--auto-reduce-crash | --no-auto-reduce-crash] [--debug-mode | --no-debug-mode] [--profile-pass [PROFILE_PASS]] [--rollback-pass [ROLLBACK_PASS]]
|
18 |
+
[--skip-pass [SKIP_PASS]] [--debug-pass [DEBUG_PASS]] [--max-prefetch-size-in-bytes [MAX_PREFETCH_SIZE_IN_BYTES]]
|
19 |
+
[--max-indirect-dma-prefetch-size-in-bytes [MAX_INDIRECT_DMA_PREFETCH_SIZE_IN_BYTES]] [--max-statebuffer-tile-size-in-bytes [MAX_STATEBUFFER_TILE_SIZE_IN_BYTES]]
|
20 |
+
[--max-computation-tile-size [MAX_COMPUTATION_TILE_SIZE]] [--max-local-tensor-tile-size-in-bytes [MAX_LOCAL_TENSOR_TILE_SIZE_IN_BYTES]]
|
21 |
+
[--max-prefetch-buffer-size-in-bytes [MAX_PREFETCH_BUFFER_SIZE_IN_BYTES]] [--enable-trivial-dmacopy-transpose | --no-enable-trivial-dmacopy-transpose]
|
22 |
+
[--enable-dmacopy-transpose | --no-enable-dmacopy-transpose] [--target-arithmetic-intensity [TARGET_ARITHMETIC_INTENSITY]]
|
23 |
+
[--disable-experimental-addr-calc | --no-disable-experimental-addr-calc] [--pool-buffer-size [POOL_BUFFER_SIZE]] [--disable-new-scatter | --no-disable-new-scatter]
|
24 |
+
[--enable-stream-transpose | --no-enable-stream-transpose] [--enable-transpose-reduce | --no-enable-transpose-reduce]
|
25 |
+
[--enable-transpose-batchnormstats2 | --no-enable-transpose-batchnormstats2] [--force-transpose-batchnormstats2 | --no-force-transpose-batchnormstats2]
|
26 |
+
[--mm-transpose-type [MM_TRANSPOSE_TYPE]] [--enable-fp32-mm-transpose | --no-enable-fp32-mm-transpose] [--disable-dma-cast | --no-disable-dma-cast]
|
27 |
+
[--enable-8bit-tensorcopy-cast | --no-enable-8bit-tensorcopy-cast] [--min-allreduce-tile-size-in-byte [MIN_ALLREDUCE_TILE_SIZE_IN_BYTE]]
|
28 |
+
[--min-allgather-tile-size-in-byte [MIN_ALLGATHER_TILE_SIZE_IN_BYTE]] [--max-inflight-allreduce [MAX_INFLIGHT_ALLREDUCE]]
|
29 |
+
[--max-dma-access-free-depth [MAX_DMA_ACCESS_FREE_DEPTH]] [--dve-bn-stats-paritition-max-elements [DVE_BN_STATS_PARITITION_MAX_ELEMENTS]]
|
30 |
+
[--max-batch-norm-reduction-size [MAX_BATCH_NORM_REDUCTION_SIZE]] [--spmd | --no-spmd] [--prioritize-minimize-transpose | --no-prioritize-minimize-transpose]
|
31 |
+
[--enable-ccop-compute-overlap | --no-enable-ccop-compute-overlap] [--enable-fine-grained-ccop-compute-overlap | --no-enable-fine-grained-ccop-compute-overlap]
|
32 |
+
[--fine-grained-ccop-compute-channels-per-ccop [FINE_GRAINED_CCOP_COMPUTE_CHANNELS_PER_CCOP]]
|
33 |
+
[--enable-dse-after-mask-propagation | --no-enable-dse-after-mask-propagation] [--enable-dge-on-io-dma | --no-enable-dge-on-io-dma]
|
34 |
+
[--enable-dge-on-spill-reload-dma | --no-enable-dge-on-spill-reload-dma] [--enable-dge-on-indirect-dma | --no-enable-dge-on-indirect-dma]
|
35 |
+
[--enable-dge-on-vector-indirect-dma | --no-enable-dge-on-vector-indirect-dma] [--enable-dge-on-dst-reduce | --no-enable-dge-on-dst-reduce]
|
36 |
+
[--enable-scalar-dge-vectorization | --no-enable-scalar-dge-vectorization] [--enable-dram-to-dram-transpose | --no-enable-dram-to-dram-transpose]
|
37 |
+
[--run-pg-layout-and-tiling | --no-run-pg-layout-and-tiling] [--disable-delinearize-io-tensors | --no-disable-delinearize-io-tensors]
|
38 |
+
[--delinearize-tensor-maximum-rank [DELINEARIZE_TENSOR_MAXIMUM_RANK]] [--delinearize-min-dim-size [DELINEARIZE_MIN_DIM_SIZE]]
|
39 |
+
[--delinearize-maximum-loop-depth [DELINEARIZE_MAXIMUM_LOOP_DEPTH]] [--big-tensor-threshold-one-d-memcpy [BIG_TENSOR_THRESHOLD_ONE_D_MEMCPY]]
|
40 |
+
[--disable-degraded-fusion | --no-disable-degraded-fusion] [--disable-tensor-op-io-reshape | --no-disable-tensor-op-io-reshape]
|
41 |
+
[--disable-non-compatible-tensor-op-io-reshape | --no-disable-non-compatible-tensor-op-io-reshape] [--dont-delinearize-tensor | --no-dont-delinearize-tensor]
|
42 |
+
[--disable-single-row-matmult | --no-disable-single-row-matmult] [--disable-single-column-matmult | --no-disable-single-column-matmult]
|
43 |
+
[--enable-penguin-mac-count | --no-enable-penguin-mac-count] [--min-tc-threshold [MIN_TC_THRESHOLD]]
|
44 |
+
[--disable-dropout-pattern-match | --no-disable-dropout-pattern-match] [--set-dropout-rate-as-keep | --no-set-dropout-rate-as-keep]
|
45 |
+
[--enable-advanced-delinearization | --no-enable-advanced-delinearization] [--keep-rng-tensor-op | --no-keep-rng-tensor-op]
|
46 |
+
[--big-tensor-threshold-one-d [BIG_TENSOR_THRESHOLD_ONE_D]] [--bir-json-version [BIR_JSON_VERSION]] [--dump-ccop-axes-group-graph | --no-dump-ccop-axes-group-graph]
|
47 |
+
[--cnn-training-model | --no-cnn-training-model] [--enable-all-reduce-axes-as-par | --no-enable-all-reduce-axes-as-par]
|
48 |
+
[--enable-pag-based-layout-analysis | --no-enable-pag-based-layout-analysis] [--enable-tiling-visualization | --no-enable-tiling-visualization]
|
49 |
+
[--enable-edge-dump | --no-enable-edge-dump] [--override-pg-tile-size [OVERRIDE_PG_TILE_SIZE]] [--enable-p-to-pp-broadcast | --no-enable-p-to-pp-broadcast]
|
50 |
+
[--partial-loop-fusion-max-iter [PARTIAL_LOOP_FUSION_MAX_ITER]] [--cast-to-round | --no-cast-to-round] [--keep-remat-dma-transpose | --no-keep-remat-dma-transpose]
|
51 |
+
[--disable-lower-transpose-to-shuffle | --no-disable-lower-transpose-to-shuffle] [--disable-bitcasted-transpose | --no-disable-bitcasted-transpose]
|
52 |
+
[--enable-bitcasted-transpose-all | --no-enable-bitcasted-transpose-all] [--enable-saturation-convert | --no-enable-saturation-convert]
|
53 |
+
[--max-tiling-permutation [MAX_TILING_PERMUTATION]] [--loop-order-heuristic [LOOP_ORDER_HEURISTIC]] [--disable-max-stride-tiling | --no-disable-max-stride-tiling]
|
54 |
+
[--flatten-single-column-dma | --no-flatten-single-column-dma] [--keep-builtins [KEEP_BUILTINS]] [--experimental-gpsimd-library [EXPERIMENTAL_GPSIMD_LIBRARY]]
|
55 |
+
[--internal_dynamic_dma_scratch_size_per_partition [INTERNAL_DYNAMIC_DMA_SCRATCH_SIZE_PER_PARTITION]]
|
56 |
+
[--internal-allow-rmsnorm-cascaded-reduce | --no-internal-allow-rmsnorm-cascaded-reduce] [--softmax-epsilon [SOFTMAX_EPSILON]]
|
57 |
+
[--max-dma-duplication [MAX_DMA_DUPLICATION]] [--max-weight-rewrite-permutation [MAX_WEIGHT_REWRITE_PERMUTATION]]
|
58 |
+
[--log-tiling-bottleneck-info | --no-log-tiling-bottleneck-info] [--inst-count-limit [INST_COUNT_LIMIT]] [--macro-instance-limit [MACRO_INSTANCE_LIMIT]]
|
59 |
+
[--always-transpose | --no-always-transpose] [--enable-prefetch-block-tensors | --no-enable-prefetch-block-tensors]
|
60 |
+
[--max-dma-legalization-permutation [MAX_DMA_LEGALIZATION_PERMUTATION]] [--disable-vectorize-dge-dma | --vectorize-dge-dma]
|
61 |
+
[--eager-tkg-vectorize-dma | --no-eager-tkg-vectorize-dma] [--no-fine-grained-cc-spill | --no-no-fine-grained-cc-spill]
|
62 |
+
[--layout-complexity-warning-threshold [LAYOUT_COMPLEXITY_WARNING_THRESHOLD]] [--partition const dim candidate threshold [PARTITION CONST DIM CANDIDATE THRESHOLD]]
|
63 |
+
[--run-layout-viewer | --no-run-layout-viewer] [--non-local-num-loadstores-threshold [NON_LOCAL_NUM_LOADSTORES_THRESHOLD]]
|
64 |
+
[--disable-degraded-flatten-axes | --no-disable-degraded-flatten-axes] [--use-accurate-reduce-cost-model | --no-use-accurate-reduce-cost-model]
|
65 |
+
[--visualize-detailed-pag-graph | --no-visualize-detailed-pag-graph] [--visualize-simplified-pag-graph | --no-visualize-simplified-pag-graph]
|
66 |
+
[--visualize-undecided-cc-graph | --no-visualize-undecided-cc-graph] [--disable-prefer-par-on-non-broadcast | --no-disable-prefer-par-on-non-broadcast]
|
67 |
+
[--cycle-based-layout-solution-size-threshold [CYCLE_BASED_LAYOUT_SOLUTION_SIZE_THRESHOLD]]
|
68 |
+
[--split-ucc-tensor-size-threshold-in-bytes [SPLIT_UCC_TENSOR_SIZE_THRESHOLD_IN_BYTES]] [--minimum-legal-par-tripcount [MINIMUM_LEGAL_PAR_TRIPCOUNT]]
|
69 |
+
[--operator-fution-split-ratio [OPERATOR_FUTION_SPLIT_RATIO]] [--keep-tensor-names | --no-keep-tensor-names] [--show-scalar-values | --no-show-scalar-values]
|
70 |
+
[--one-tensor-per-line | --no-one-tensor-per-line] [--no-ssa-style | --no-no-ssa-style] [--no-collapse-like-dims | --no-no-collapse-like-dims]
|
71 |
+
[--keep-offloaded-mem-intrinsics | --no-keep-offloaded-mem-intrinsics] [--no-color-terminal | --no-no-color-terminal]
|
72 |
+
[--dump-sharding-decision-graph | --no-dump-sharding-decision-graph] [--shard-axes [SHARD_AXES]]
|
73 |
+
[--experimental-sharding-propagation | --no-experimental-sharding-propagation] [--mem-bound-ratio-for-mm-sharding [MEM_BOUND_RATIO_FOR_MM_SHARDING]]
|
74 |
+
[--enable-lower-shard-axis-before-fusion | --no-enable-lower-shard-axis-before-fusion] [--enable-nki-attention-kernel | --no-enable-nki-attention-kernel]
|
75 |
+
[--enable-software-pipelining | --no-enable-software-pipelining] [--internal-lnc-pad-sendrecv | --no-internal-lnc-pad-sendrecv]
|
76 |
+
[--enable-send-recv-cce | --no-enable-send-recv-cce] [--use-ilp-layout-search | --no-use-ilp-layout-search]
|
77 |
+
[--set-nki-shard-on-producer-consumer | --no-set-nki-shard-on-producer-consumer]
|
78 |
+
[--insert-offloaded-transpose-dma-free-threshold [INSERT_OFFLOADED_TRANSPOSE_DMA_FREE_THRESHOLD]] [--enable-cast-in-select | --no-enable-cast-in-select]
|
79 |
+
[--delinear-contract-dim | --no-delinear-contract-dim] [--vectorize-partitions | --no-vectorize-partitions]
|
80 |
+
[--internal-disable-double-row-gen3 | --no-internal-disable-double-row-gen3] [--internal-autotune | --no-internal-autotune]
|
81 |
+
[--internal-autotune-config [INTERNAL_AUTOTUNE_CONFIG]] [--internal-autotune-subprocess [INTERNAL_AUTOTUNE_SUBPROCESS]]
|
82 |
+
[--internal-autotune-extraction-process [INTERNAL_AUTOTUNE_EXTRACTION_PROCESS]] [--tf-dma-size-in-bytes [TF_DMA_SIZE_IN_BYTES]]
|
83 |
+
[--tf-low-memory-pressure-threshold [TF_LOW_MEMORY_PRESSURE_THRESHOLD]] [--enable-isl-in-injective-check | --no-enable-isl-in-injective-check]
|
84 |
+
[--enable-symbolic-memory-pressure-estimation-tf | --no-enable-symbolic-memory-pressure-estimation-tf]
|
85 |
+
[--allow-ccrank-axis-tritium-fusion | --no-allow-ccrank-axis-tritium-fusion]
|
86 |
+
[--internal-autotune-tritium-use-more-tripcounts | --no-internal-autotune-tritium-use-more-tripcounts]
|
87 |
+
[--internal-autotune-tritium-only-with-id [INTERNAL_AUTOTUNE_TRITIUM_ONLY_WITH_ID]] [--vectorize-strided-dma | --no-vectorize-strided-dma]
|
88 |
+
[--profile-smt | --no-profile-smt] [--number-of-devices [NUMBER_OF_DEVICES]] [--cc-pipeline-tiling-factor [CC_PIPELINE_TILING_FACTOR]]
|
89 |
+
[--no-cc-pipeline-tiling-for-fsdp | --no-no-cc-pipeline-tiling-for-fsdp] [--cc-pipeline-tiling-for-fsdp-only | --no-cc-pipeline-tiling-for-fsdp-only]
|
90 |
+
[--experimental-convolution-kernel-match | --no-experimental-convolution-kernel-match] [--disable-inline-cast | --no-disable-inline-cast]
|
91 |
+
[--disable-affine-select | --no-disable-affine-select] [--profile-memory-pressure | --no-profile-memory-pressure]
|
92 |
+
[--report-n-lowest-utilization [REPORT_N_LOWEST_UTILIZATION]] [--vectorize-direct-dma | --no-vectorize-direct-dma]
|
93 |
+
[--log-top-n-latency-dmas [LOG_TOP_N_LATENCY_DMAS]] [--low-psum-usage-threshold [LOW_PSUM_USAGE_THRESHOLD]]
|
94 |
+
[--warn-parallelism-threshold [WARN_PARALLELISM_THRESHOLD]] [--disable-square-matmul | --no-disable-square-matmul]
|
95 |
+
[--disable-vector-transpose | --no-disable-vector-transpose] [--disable-software-replication | --no-disable-software-replication]
|
96 |
+
[--internal-disable-fma-on-ios | --no-internal-disable-fma-on-ios] [--nki-dl | --no-nki-dl] [--disable-tiling-allreduce | --no-disable-tiling-allreduce]
|
97 |
+
[--annotate-no-spill-hint | --no-annotate-no-spill-hint] [--print-nki | --no-print-nki] [--nki-debug-mode | --no-nki-debug-mode]
|
98 |
+
[--ccop-bucketing | --no-ccop-bucketing] [--fp32-cast-input-tensors | --no-fp32-cast-input-tensors] [--enable-tritium-loopfusion | --no-enable-tritium-loopfusion]
|
99 |
+
[--enable-ternary-fission | --no-enable-ternary-fission] [--disable-insert-implicit-shard-axis | --no-disable-insert-implicit-shard-axis]
|
100 |
+
[--enable-hoist-wlo-all-gather | --no-enable-hoist-wlo-all-gather] [--enable-hoist-fsdp-collectives | --no-enable-hoist-fsdp-collectives]
|
101 |
+
[--disable-concat-delinearizer | --no-disable-concat-delinearizer] [--enable-aliasing-dependency-verifier | --no-enable-aliasing-dependency-verifier]
|
102 |
+
[--enable-must-alias-to-iobuffer | --no-enable-must-alias-to-iobuffer] [--disable-partition-locality-tiling | --no-disable-partition-locality-tiling]
|
103 |
+
[--enable-memory-pressure-driven-loop-fusion | --no-enable-memory-pressure-driven-loop-fusion] [--legalize-tensor-tensor-op | --no-legalize-tensor-tensor-op]
|
104 |
+
[--layout-transform-heuristic [LAYOUT_TRANSFORM_HEURISTIC]] [--disable-bir-codegen-loadstore | --no-disable-bir-codegen-loadstore]
|
105 |
+
[--dump-tensorizer-bir-json | --no-dump-tensorizer-bir-json] [--disable-rank-id-rewriting | --no-disable-rank-id-rewriting]
|
106 |
+
[--vectorization-size [VECTORIZATION_SIZE]] [--atol [ATOL]] [--rtol [RTOL]] [--save-locals | --no-save-locals]
|
107 |
+
[--no-simplify-before-simulation | --no-no-simplify-before-simulation] [--correct-precision-mode | --no-correct-precision-mode]
|
108 |
+
[--dont-verify-after-all | --no-dont-verify-after-all] [--disable-debug-info-dump | --no-disable-debug-info-dump] [--run-pass-list [RUN_PASS_LIST]]
|
109 |
+
[--dump-pass-list [DUMP_PASS_LIST]] [--dump-pass-list-and-exit | --no-dump-pass-list-and-exit] [--print-stats | --no-print-stats]
|
110 |
+
[--run-simulator-after [RUN_SIMULATOR_AFTER]] [--enable-peephole-inst-combine | --no-enable-peephole-inst-combine]
|
111 |
+
[--enable-repartitioning | --no-enable-repartitioning] [--no-ccop-barrier | --no-no-ccop-barrier]
|
112 |
+
[--enable-iobuffer-to-must-alias | --no-enable-iobuffer-to-must-alias] [--custom-script [CUSTOM_SCRIPT]] [--enable-bir-converter [ENABLE_BIR_CONVERTER]]
|
113 |
+
[--custom-compute [CUSTOM_COMPUTE]] [--enable-bircodegen-unroll [ENABLE_BIRCODEGEN_UNROLL]] [--fuse-param-to-neff | --no-fuse-param-to-neff]
|
114 |
+
[--only-compile-subgraph [ONLY_COMPILE_SUBGRAPH]] [--model-type-transformer | --no-model-type-transformer] [--model-type-cnn-training | --no-model-type-cnn-training]
|
115 |
+
[--distribution-type-llm-training | --no-distribution-type-llm-training] [--num-neuroncores-per-sengine [NUM_NEURONCORES_PER_SENGINE]]
|
116 |
+
neuronx-cc: error: argument --cc-pipeline-tiling-factor: invalid int value: '2--vectorize-strided-dma'
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73521137a0627d1cbcf3276af2044ea2e025b43384d5ba149c1ee9f28e06ae23
|
3 |
+
size 88353
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b66d4b1a0323238c3a55ab0dc7f54b938f398ac7b79ed19aed77d248df0ddc12
|
3 |
+
size 308224
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e68080a74eeee69a4cca47ffb58ac94d4475079ee317556d3c4985d658030a7
|
3 |
+
size 52641
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1faffbfaeb70a15963aaa15126906cc088054168366d15b32f56ff2235d63f96
|
3 |
+
size 185344
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f75310b79ede900f855f7d33db734190e3d8b1f8c38de1d620c7660449b8abde
|
3 |
+
size 195539
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91462061b8dd8d78b36005464d30793234a3b8d0d65025605c7a747756879de4
|
3 |
+
size 88814
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fccf83e1f68a07690baf4dba83c9edb5a4a0dcb4f9b868628e780b46605ac229
|
3 |
+
size 236544
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|