dacorvo HF Staff commited on
Commit
6fb6d15
·
verified ·
1 Parent(s): 6ff0176

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +42 -0
  2. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json +59 -0
  3. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json +59 -0
  4. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json +63 -0
  5. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json +63 -0
  6. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json +64 -0
  7. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json +59 -0
  8. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json +59 -0
  9. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json +60 -0
  10. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json +60 -0
  11. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json +83 -0
  12. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json +65 -0
  13. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json +65 -0
  14. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json +66 -0
  15. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json +66 -0
  16. neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json +1 -0
  17. neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done +0 -0
  18. neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb +3 -0
  19. neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff +3 -0
  20. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json +1 -0
  21. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done +0 -0
  22. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb +3 -0
  23. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff +3 -0
  24. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo +3 -0
  25. neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json +1 -0
  26. neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done +0 -0
  27. neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb +3 -0
  28. neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff +3 -0
  29. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json +1 -0
  30. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done +0 -0
  31. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb +3 -0
  32. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff +3 -0
  33. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo +3 -0
  34. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json +1 -0
  35. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb +3 -0
  36. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log +116 -0
  37. neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json +1 -0
  38. neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done +0 -0
  39. neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb +3 -0
  40. neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff +3 -0
  41. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json +1 -0
  42. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done +0 -0
  43. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb +3 -0
  44. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff +3 -0
  45. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo +3 -0
  46. neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json +1 -0
  47. neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done +0 -0
  48. neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb +3 -0
  49. neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff +3 -0
  50. neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json +1 -0
.gitattributes CHANGED
@@ -4516,3 +4516,45 @@ neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/model.neff f
4516
  neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4517
  neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text
4518
  neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4516
  neuronxcc-2.20.9961.0+0acef03a/MODULE_57ea1fad0cfb9ddd41c1+df19c9f3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4517
  neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/model.neff filter=lfs diff=lfs merge=lfs -text
4518
  neuronxcc-2.20.9961.0+0acef03a/MODULE_a06fa11271d76cc4676d+80826760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4519
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4520
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
4521
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4522
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
4523
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
4524
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4525
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4526
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4527
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4528
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4529
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
4530
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4531
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4532
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4533
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4534
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4535
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4536
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
4537
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4538
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4539
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4540
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4541
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4542
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4543
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4544
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4545
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4546
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
4547
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4548
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4549
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4550
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4551
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4552
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4553
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4554
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
4555
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4556
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
4557
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4558
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4559
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4560
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
25
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
26
+ "continuous_batching": true,
27
+ "enable_bucketing": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "logical_nc_config": 1,
33
+ "max_batch_size": 2,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.3.1.dev5",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": null,
46
+ "torch_dtype": "float16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 4,
50
+ "num_hidden_layers": 2,
51
+ "num_key_value_heads": 4,
52
+ "residual_multiplier": 1.0,
53
+ "rms_norm_eps": 1e-06,
54
+ "rope_scaling": null,
55
+ "rope_theta": 10000.0,
56
+ "tie_word_embeddings": false,
57
+ "use_cache": true,
58
+ "vocab_size": 49152
59
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
25
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
26
+ "continuous_batching": false,
27
+ "enable_bucketing": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "logical_nc_config": 1,
33
+ "max_batch_size": 1,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.3.1.dev5",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": null,
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 4,
50
+ "num_hidden_layers": 2,
51
+ "num_key_value_heads": 4,
52
+ "residual_multiplier": 1.0,
53
+ "rms_norm_eps": 1e-06,
54
+ "rope_scaling": null,
55
+ "rope_theta": 10000.0,
56
+ "tie_word_embeddings": false,
57
+ "use_cache": true,
58
+ "vocab_size": 49152
59
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
23
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
24
+ "continuous_batching": true,
25
+ "enable_bucketing": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "logical_nc_config": 1,
31
+ "max_batch_size": 2,
32
+ "max_context_length": 1024,
33
+ "max_topk": 256,
34
+ "n_active_tokens": 1024,
35
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
36
+ "on_device_sampling": true,
37
+ "optimum_neuron_version": "0.3.1.dev5",
38
+ "output_logits": false,
39
+ "pp_degree": 1,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": null,
44
+ "torch_dtype": "float16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 4,
48
+ "num_hidden_layers": 2,
49
+ "num_key_value_heads": 4,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
23
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
24
+ "continuous_batching": false,
25
+ "enable_bucketing": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "logical_nc_config": 1,
31
+ "max_batch_size": 1,
32
+ "max_context_length": 1024,
33
+ "max_topk": 256,
34
+ "n_active_tokens": 1024,
35
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
36
+ "on_device_sampling": true,
37
+ "optimum_neuron_version": "0.3.1.dev5",
38
+ "output_logits": false,
39
+ "pp_degree": 1,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": null,
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 4,
48
+ "num_hidden_layers": 2,
49
+ "num_key_value_heads": 4,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
23
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
24
+ "continuous_batching": false,
25
+ "enable_bucketing": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 24,
30
+ "logical_nc_config": 1,
31
+ "max_batch_size": 1,
32
+ "max_context_length": 4096,
33
+ "max_topk": 256,
34
+ "n_active_tokens": 4096,
35
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
36
+ "on_device_sampling": true,
37
+ "optimum_neuron_version": "0.3.1.dev5",
38
+ "output_logits": false,
39
+ "pp_degree": 1,
40
+ "sequence_length": 4096,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": null,
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 24
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "batch_size": 1,
19
+ "capacity_factor": null,
20
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
21
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
22
+ "continuous_batching": false,
23
+ "enable_bucketing": false,
24
+ "ep_degree": 1,
25
+ "fused_qkv": false,
26
+ "glu_mlp": true,
27
+ "local_ranks_size": 2,
28
+ "logical_nc_config": 1,
29
+ "max_batch_size": 1,
30
+ "max_context_length": 1024,
31
+ "max_topk": 256,
32
+ "n_active_tokens": 1024,
33
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
34
+ "on_device_sampling": false,
35
+ "optimum_neuron_version": "0.3.1.dev5",
36
+ "output_logits": false,
37
+ "pp_degree": 1,
38
+ "sequence_length": 1024,
39
+ "speculation_length": 0,
40
+ "start_rank_id": 0,
41
+ "target": null,
42
+ "torch_dtype": "bfloat16",
43
+ "tp_degree": 2
44
+ },
45
+ "num_attention_heads": 32,
46
+ "num_experts_per_tok": 2,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 8,
49
+ "num_local_experts": 8,
50
+ "output_router_logits": false,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_theta": 10000.0,
53
+ "router_aux_loss_coef": 0.001,
54
+ "router_jitter_noise": 0.0,
55
+ "sliding_window": 4096,
56
+ "tie_word_embeddings": false,
57
+ "use_cache": true,
58
+ "vocab_size": 32000
59
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "batch_size": 2,
19
+ "capacity_factor": null,
20
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
21
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
22
+ "continuous_batching": false,
23
+ "enable_bucketing": false,
24
+ "ep_degree": 1,
25
+ "fused_qkv": false,
26
+ "glu_mlp": true,
27
+ "local_ranks_size": 2,
28
+ "logical_nc_config": 1,
29
+ "max_batch_size": 2,
30
+ "max_context_length": 1024,
31
+ "max_topk": 256,
32
+ "n_active_tokens": 1024,
33
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
34
+ "on_device_sampling": false,
35
+ "optimum_neuron_version": "0.3.1.dev5",
36
+ "output_logits": false,
37
+ "pp_degree": 1,
38
+ "sequence_length": 1024,
39
+ "speculation_length": 0,
40
+ "start_rank_id": 0,
41
+ "target": null,
42
+ "torch_dtype": "float16",
43
+ "tp_degree": 2
44
+ },
45
+ "num_attention_heads": 32,
46
+ "num_experts_per_tok": 2,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 8,
49
+ "num_local_experts": 8,
50
+ "output_router_logits": false,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_theta": 10000.0,
53
+ "router_aux_loss_coef": 0.001,
54
+ "router_jitter_noise": 0.0,
55
+ "sliding_window": 4096,
56
+ "tie_word_embeddings": false,
57
+ "use_cache": true,
58
+ "vocab_size": 32000
59
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
23
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
24
+ "continuous_batching": true,
25
+ "enable_bucketing": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "logical_nc_config": 1,
31
+ "max_batch_size": 2,
32
+ "max_context_length": 1024,
33
+ "max_topk": 256,
34
+ "n_active_tokens": 1024,
35
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
36
+ "on_device_sampling": true,
37
+ "optimum_neuron_version": "0.3.1.dev5",
38
+ "output_logits": false,
39
+ "pp_degree": 1,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": null,
44
+ "torch_dtype": "float16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 2,
48
+ "num_hidden_layers": 2,
49
+ "num_key_value_heads": 1,
50
+ "original_max_position_embeddings": 16384,
51
+ "partial_rotary_factor": 1.0,
52
+ "resid_pdrop": 0.0,
53
+ "rms_norm_eps": 1e-05,
54
+ "rope_scaling": null,
55
+ "rope_theta": 250000,
56
+ "sliding_window": null,
57
+ "tie_word_embeddings": false,
58
+ "use_cache": true,
59
+ "vocab_size": 100352
60
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
23
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
24
+ "continuous_batching": false,
25
+ "enable_bucketing": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "logical_nc_config": 1,
31
+ "max_batch_size": 1,
32
+ "max_context_length": 1024,
33
+ "max_topk": 256,
34
+ "n_active_tokens": 1024,
35
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
36
+ "on_device_sampling": true,
37
+ "optimum_neuron_version": "0.3.1.dev5",
38
+ "output_logits": false,
39
+ "pp_degree": 1,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": null,
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 2,
48
+ "num_hidden_layers": 2,
49
+ "num_key_value_heads": 1,
50
+ "original_max_position_embeddings": 16384,
51
+ "partial_rotary_factor": 1.0,
52
+ "resid_pdrop": 0.0,
53
+ "rms_norm_eps": 1e-05,
54
+ "rope_scaling": null,
55
+ "rope_theta": 250000,
56
+ "sliding_window": null,
57
+ "tie_word_embeddings": false,
58
+ "use_cache": true,
59
+ "vocab_size": 100352
60
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/362608c65859fa989b0c.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention"
38
+ ],
39
+ "max_position_embeddings": 32768,
40
+ "max_window_layers": 24,
41
+ "model_type": "qwen2",
42
+ "neuron": {
43
+ "_serialized_key": "NxDNeuronConfig",
44
+ "batch_size": 1,
45
+ "capacity_factor": null,
46
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
47
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
48
+ "continuous_batching": false,
49
+ "enable_bucketing": false,
50
+ "ep_degree": 1,
51
+ "fused_qkv": false,
52
+ "glu_mlp": true,
53
+ "local_ranks_size": 2,
54
+ "logical_nc_config": 1,
55
+ "max_batch_size": 1,
56
+ "max_context_length": 4096,
57
+ "max_topk": 256,
58
+ "n_active_tokens": 4096,
59
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
60
+ "on_device_sampling": true,
61
+ "optimum_neuron_version": "0.3.1.dev5",
62
+ "output_logits": false,
63
+ "pp_degree": 1,
64
+ "sequence_length": 4096,
65
+ "speculation_length": 0,
66
+ "start_rank_id": 0,
67
+ "target": null,
68
+ "torch_dtype": "bfloat16",
69
+ "tp_degree": 2
70
+ },
71
+ "num_attention_heads": 14,
72
+ "num_hidden_layers": 24,
73
+ "num_key_value_heads": 2,
74
+ "rms_norm_eps": 1e-06,
75
+ "rope_scaling": null,
76
+ "rope_theta": 1000000.0,
77
+ "sliding_window": null,
78
+ "tie_word_embeddings": true,
79
+ "use_cache": true,
80
+ "use_mrope": false,
81
+ "use_sliding_window": false,
82
+ "vocab_size": 151936
83
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
25
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
26
+ "continuous_batching": true,
27
+ "enable_bucketing": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "logical_nc_config": 1,
33
+ "max_batch_size": 2,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
38
+ "on_device_sampling": false,
39
+ "optimum_neuron_version": "0.3.1.dev5",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": null,
46
+ "torch_dtype": "float16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 4,
50
+ "num_hidden_layers": 2,
51
+ "num_key_value_heads": 2,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_scaling": {
54
+ "factor": 4.0,
55
+ "original_max_position_embeddings": 32768,
56
+ "rope_type": "yarn",
57
+ "type": "yarn"
58
+ },
59
+ "rope_theta": 1000000.0,
60
+ "sliding_window": null,
61
+ "tie_word_embeddings": false,
62
+ "use_cache": true,
63
+ "use_sliding_window": false,
64
+ "vocab_size": 152064
65
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
25
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
26
+ "continuous_batching": false,
27
+ "enable_bucketing": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "logical_nc_config": 1,
33
+ "max_batch_size": 1,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.3.1.dev5",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": null,
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 4,
50
+ "num_hidden_layers": 2,
51
+ "num_key_value_heads": 2,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_scaling": {
54
+ "factor": 4.0,
55
+ "original_max_position_embeddings": 32768,
56
+ "rope_type": "yarn",
57
+ "type": "yarn"
58
+ },
59
+ "rope_theta": 1000000.0,
60
+ "sliding_window": null,
61
+ "tie_word_embeddings": false,
62
+ "use_cache": true,
63
+ "use_sliding_window": false,
64
+ "vocab_size": 152064
65
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 2,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
26
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "fused_qkv": false,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 1024,
36
+ "max_topk": 256,
37
+ "n_active_tokens": 1024,
38
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
39
+ "on_device_sampling": false,
40
+ "optimum_neuron_version": "0.3.1.dev5",
41
+ "output_logits": false,
42
+ "pp_degree": 1,
43
+ "sequence_length": 1024,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": null,
47
+ "torch_dtype": "float16",
48
+ "tp_degree": 2
49
+ },
50
+ "norm_topk_prob": true,
51
+ "num_attention_heads": 2,
52
+ "num_experts": 8,
53
+ "num_experts_per_tok": 2,
54
+ "num_hidden_layers": 2,
55
+ "num_key_value_heads": 1,
56
+ "output_router_logits": false,
57
+ "rms_norm_eps": 1e-06,
58
+ "rope_scaling": null,
59
+ "rope_theta": 1000000.0,
60
+ "router_aux_loss_coef": 0.001,
61
+ "sliding_window": null,
62
+ "tie_word_embeddings": true,
63
+ "use_cache": true,
64
+ "use_sliding_window": false,
65
+ "vocab_size": 151936
66
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
26
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "fused_qkv": false,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 1024,
36
+ "max_topk": 256,
37
+ "n_active_tokens": 1024,
38
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
39
+ "on_device_sampling": false,
40
+ "optimum_neuron_version": "0.3.1.dev5",
41
+ "output_logits": false,
42
+ "pp_degree": 1,
43
+ "sequence_length": 1024,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": null,
47
+ "torch_dtype": "bfloat16",
48
+ "tp_degree": 2
49
+ },
50
+ "norm_topk_prob": true,
51
+ "num_attention_heads": 2,
52
+ "num_experts": 8,
53
+ "num_experts_per_tok": 2,
54
+ "num_hidden_layers": 2,
55
+ "num_key_value_heads": 1,
56
+ "output_router_logits": false,
57
+ "rms_norm_eps": 1e-06,
58
+ "rope_scaling": null,
59
+ "rope_theta": 1000000.0,
60
+ "router_aux_loss_coef": 0.001,
61
+ "sliding_window": null,
62
+ "tie_word_embeddings": true,
63
+ "use_cache": true,
64
+ "use_sliding_window": false,
65
+ "vocab_size": 151936
66
+ }
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd7dae1ac13e262e1a7f9472b6a31e792e9c78b1214495916d9e83666910cb83
3
+ size 567258
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6d4d1dc08dd89019640186d5368b1a308f0bd285a59f3f2e2a97e2bfc50b89
3
+ size 14961664
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beff4a7ab7f70afbb24a85c88ce24e5bc5cfae6de236e3f9686176defedd5222
3
+ size 81016
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc275f95550b0e1be60dc7ed4bbc81233eab5133b3b0af0f576bae41fda91887
3
+ size 297984
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62347f65c836de46507f6affd655cd1f15290142541de20acc56feac1fcef987
3
+ size 308436
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d7647959b5592dd250b2a9f5c68330b6f6bca38bd34f9fb881d2958e03f881
3
+ size 83591
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e76aff9b7791f1118cf7da468310c32943d4a8a828a53d19775178041d8deb
3
+ size 707584
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c66551e445c34b0e221ecb2b2b7303796c4e0eb8ca338b4c297bebe314fed43
3
+ size 69881
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876faeaea6b0587c3ff8663d3114f70d0484e607ab88e735a275ad01c223cbc9
3
+ size 236544
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ebfa9813b63ed97dbe6925f3ba4e5a94ea93bd0ad008070dc32430f87d9bd72
3
+ size 247874
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cfa8e742c4893766c3b63b6971e73469c20ed6ed959007a32f73a5d6e66751
3
+ size 81550
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: usage: neuronx-cc [-h] [--compatible-mode | --no-compatible-mode] [--disable-expensive-checks | --no-disable-expensive-checks]
2
+ [--fp16-bilinear-upsampling | --no-fp16-bilinear-upsampling] [--tensor-no-opt-pass [TENSOR_NO_OPT_PASS]] [--set-tensors-no-opt [SET_TENSORS_NO_OPT]]
3
+ [--model-specific-opt [MODEL_SPECIFIC_OPT]] [--statebuffer-scratch-size-in-bytes [STATEBUFFER_SCRATCH_SIZE_IN_BYTES]] [--target-mm-vec-size [TARGET_MM_VEC_SIZE]]
4
+ [--disable-global-redundant-load-elimination | --no-disable-global-redundant-load-elimination] [--avoid-loop-reduce | --no-avoid-loop-reduce]
5
+ [--disable-partition-vectorization | --no-disable-partition-vectorization]
6
+ [--disable-tiling-of-non-overlapping-mem-access | --no-disable-tiling-of-non-overlapping-mem-access] [--weight-coalescing-threshold [WEIGHT_COALESCING_THRESHOLD]]
7
+ [--static-weights | --no-static-weights] [--tensor-layout-p-order [TENSOR_LAYOUT_P_ORDER]] [--tensor-layout-b-order [TENSOR_LAYOUT_B_ORDER]]
8
+ [--tensor-layout-f-order [TENSOR_LAYOUT_F_ORDER]] [--fp32-cast [FP32_CAST]] [--enable-replication | --no-enable-replication]
9
+ [--use-inferentia-hwm | --no-use-inferentia-hwm] [--hbm-scratchpad-page-size-in-bytes [HBM_SCRATCHPAD_PAGE_SIZE_IN_BYTES]]
10
+ [--enable-tensorized-spiller | --no-enable-tensorized-spiller] [--disable-spill-free-kernels | --no-disable-spill-free-kernels]
11
+ [--enable-smt-allocator | --no-enable-smt-allocator] [--nki-manual-allocation | --no-nki-manual-allocation] [--enable-softmax-kernel | --no-enable-softmax-kernel]
12
+ [--softmax-division-delay | --no-softmax-division-delay] [--accumulate-on-alu-dtype | --no-accumulate-on-alu-dtype]
13
+ [--enable-shard-axis-verifier | --no-enable-shard-axis-verifier] [--non-local-tripcount-threshold [NON_LOCAL_TRIPCOUNT_THRESHOLD]]
14
+ [--force-non-local-tensors [FORCE_NON_LOCAL_TENSORS]] [--force-concat-to-non-local | --no-force-concat-to-non-local]
15
+ [--force-all-matmult-input-non-local | --no-force-all-matmult-input-non-local] [--large-1d-tensor-threshold [LARGE_1D_TENSOR_THRESHOLD]] [--dump-after [DUMP_AFTER]]
16
+ [--dump-path [DUMP_PATH]] [--dump-files | --no-dump-files] [--save-weights | --no-save-weights] [--dump-nki | --no-dump-nki]
17
+ [--auto-reduce-crash | --no-auto-reduce-crash] [--debug-mode | --no-debug-mode] [--profile-pass [PROFILE_PASS]] [--rollback-pass [ROLLBACK_PASS]]
18
+ [--skip-pass [SKIP_PASS]] [--debug-pass [DEBUG_PASS]] [--max-prefetch-size-in-bytes [MAX_PREFETCH_SIZE_IN_BYTES]]
19
+ [--max-indirect-dma-prefetch-size-in-bytes [MAX_INDIRECT_DMA_PREFETCH_SIZE_IN_BYTES]] [--max-statebuffer-tile-size-in-bytes [MAX_STATEBUFFER_TILE_SIZE_IN_BYTES]]
20
+ [--max-computation-tile-size [MAX_COMPUTATION_TILE_SIZE]] [--max-local-tensor-tile-size-in-bytes [MAX_LOCAL_TENSOR_TILE_SIZE_IN_BYTES]]
21
+ [--max-prefetch-buffer-size-in-bytes [MAX_PREFETCH_BUFFER_SIZE_IN_BYTES]] [--enable-trivial-dmacopy-transpose | --no-enable-trivial-dmacopy-transpose]
22
+ [--enable-dmacopy-transpose | --no-enable-dmacopy-transpose] [--target-arithmetic-intensity [TARGET_ARITHMETIC_INTENSITY]]
23
+ [--disable-experimental-addr-calc | --no-disable-experimental-addr-calc] [--pool-buffer-size [POOL_BUFFER_SIZE]] [--disable-new-scatter | --no-disable-new-scatter]
24
+ [--enable-stream-transpose | --no-enable-stream-transpose] [--enable-transpose-reduce | --no-enable-transpose-reduce]
25
+ [--enable-transpose-batchnormstats2 | --no-enable-transpose-batchnormstats2] [--force-transpose-batchnormstats2 | --no-force-transpose-batchnormstats2]
26
+ [--mm-transpose-type [MM_TRANSPOSE_TYPE]] [--enable-fp32-mm-transpose | --no-enable-fp32-mm-transpose] [--disable-dma-cast | --no-disable-dma-cast]
27
+ [--enable-8bit-tensorcopy-cast | --no-enable-8bit-tensorcopy-cast] [--min-allreduce-tile-size-in-byte [MIN_ALLREDUCE_TILE_SIZE_IN_BYTE]]
28
+ [--min-allgather-tile-size-in-byte [MIN_ALLGATHER_TILE_SIZE_IN_BYTE]] [--max-inflight-allreduce [MAX_INFLIGHT_ALLREDUCE]]
29
+ [--max-dma-access-free-depth [MAX_DMA_ACCESS_FREE_DEPTH]] [--dve-bn-stats-paritition-max-elements [DVE_BN_STATS_PARITITION_MAX_ELEMENTS]]
30
+ [--max-batch-norm-reduction-size [MAX_BATCH_NORM_REDUCTION_SIZE]] [--spmd | --no-spmd] [--prioritize-minimize-transpose | --no-prioritize-minimize-transpose]
31
+ [--enable-ccop-compute-overlap | --no-enable-ccop-compute-overlap] [--enable-fine-grained-ccop-compute-overlap | --no-enable-fine-grained-ccop-compute-overlap]
32
+ [--fine-grained-ccop-compute-channels-per-ccop [FINE_GRAINED_CCOP_COMPUTE_CHANNELS_PER_CCOP]]
33
+ [--enable-dse-after-mask-propagation | --no-enable-dse-after-mask-propagation] [--enable-dge-on-io-dma | --no-enable-dge-on-io-dma]
34
+ [--enable-dge-on-spill-reload-dma | --no-enable-dge-on-spill-reload-dma] [--enable-dge-on-indirect-dma | --no-enable-dge-on-indirect-dma]
35
+ [--enable-dge-on-vector-indirect-dma | --no-enable-dge-on-vector-indirect-dma] [--enable-dge-on-dst-reduce | --no-enable-dge-on-dst-reduce]
36
+ [--enable-scalar-dge-vectorization | --no-enable-scalar-dge-vectorization] [--enable-dram-to-dram-transpose | --no-enable-dram-to-dram-transpose]
37
+ [--run-pg-layout-and-tiling | --no-run-pg-layout-and-tiling] [--disable-delinearize-io-tensors | --no-disable-delinearize-io-tensors]
38
+ [--delinearize-tensor-maximum-rank [DELINEARIZE_TENSOR_MAXIMUM_RANK]] [--delinearize-min-dim-size [DELINEARIZE_MIN_DIM_SIZE]]
39
+ [--delinearize-maximum-loop-depth [DELINEARIZE_MAXIMUM_LOOP_DEPTH]] [--big-tensor-threshold-one-d-memcpy [BIG_TENSOR_THRESHOLD_ONE_D_MEMCPY]]
40
+ [--disable-degraded-fusion | --no-disable-degraded-fusion] [--disable-tensor-op-io-reshape | --no-disable-tensor-op-io-reshape]
41
+ [--disable-non-compatible-tensor-op-io-reshape | --no-disable-non-compatible-tensor-op-io-reshape] [--dont-delinearize-tensor | --no-dont-delinearize-tensor]
42
+ [--disable-single-row-matmult | --no-disable-single-row-matmult] [--disable-single-column-matmult | --no-disable-single-column-matmult]
43
+ [--enable-penguin-mac-count | --no-enable-penguin-mac-count] [--min-tc-threshold [MIN_TC_THRESHOLD]]
44
+ [--disable-dropout-pattern-match | --no-disable-dropout-pattern-match] [--set-dropout-rate-as-keep | --no-set-dropout-rate-as-keep]
45
+ [--enable-advanced-delinearization | --no-enable-advanced-delinearization] [--keep-rng-tensor-op | --no-keep-rng-tensor-op]
46
+ [--big-tensor-threshold-one-d [BIG_TENSOR_THRESHOLD_ONE_D]] [--bir-json-version [BIR_JSON_VERSION]] [--dump-ccop-axes-group-graph | --no-dump-ccop-axes-group-graph]
47
+ [--cnn-training-model | --no-cnn-training-model] [--enable-all-reduce-axes-as-par | --no-enable-all-reduce-axes-as-par]
48
+ [--enable-pag-based-layout-analysis | --no-enable-pag-based-layout-analysis] [--enable-tiling-visualization | --no-enable-tiling-visualization]
49
+ [--enable-edge-dump | --no-enable-edge-dump] [--override-pg-tile-size [OVERRIDE_PG_TILE_SIZE]] [--enable-p-to-pp-broadcast | --no-enable-p-to-pp-broadcast]
50
+ [--partial-loop-fusion-max-iter [PARTIAL_LOOP_FUSION_MAX_ITER]] [--cast-to-round | --no-cast-to-round] [--keep-remat-dma-transpose | --no-keep-remat-dma-transpose]
51
+ [--disable-lower-transpose-to-shuffle | --no-disable-lower-transpose-to-shuffle] [--disable-bitcasted-transpose | --no-disable-bitcasted-transpose]
52
+ [--enable-bitcasted-transpose-all | --no-enable-bitcasted-transpose-all] [--enable-saturation-convert | --no-enable-saturation-convert]
53
+ [--max-tiling-permutation [MAX_TILING_PERMUTATION]] [--loop-order-heuristic [LOOP_ORDER_HEURISTIC]] [--disable-max-stride-tiling | --no-disable-max-stride-tiling]
54
+ [--flatten-single-column-dma | --no-flatten-single-column-dma] [--keep-builtins [KEEP_BUILTINS]] [--experimental-gpsimd-library [EXPERIMENTAL_GPSIMD_LIBRARY]]
55
+ [--internal_dynamic_dma_scratch_size_per_partition [INTERNAL_DYNAMIC_DMA_SCRATCH_SIZE_PER_PARTITION]]
56
+ [--internal-allow-rmsnorm-cascaded-reduce | --no-internal-allow-rmsnorm-cascaded-reduce] [--softmax-epsilon [SOFTMAX_EPSILON]]
57
+ [--max-dma-duplication [MAX_DMA_DUPLICATION]] [--max-weight-rewrite-permutation [MAX_WEIGHT_REWRITE_PERMUTATION]]
58
+ [--log-tiling-bottleneck-info | --no-log-tiling-bottleneck-info] [--inst-count-limit [INST_COUNT_LIMIT]] [--macro-instance-limit [MACRO_INSTANCE_LIMIT]]
59
+ [--always-transpose | --no-always-transpose] [--enable-prefetch-block-tensors | --no-enable-prefetch-block-tensors]
60
+ [--max-dma-legalization-permutation [MAX_DMA_LEGALIZATION_PERMUTATION]] [--disable-vectorize-dge-dma | --vectorize-dge-dma]
61
+ [--eager-tkg-vectorize-dma | --no-eager-tkg-vectorize-dma] [--no-fine-grained-cc-spill | --no-no-fine-grained-cc-spill]
62
+ [--layout-complexity-warning-threshold [LAYOUT_COMPLEXITY_WARNING_THRESHOLD]] [--partition const dim candidate threshold [PARTITION CONST DIM CANDIDATE THRESHOLD]]
63
+ [--run-layout-viewer | --no-run-layout-viewer] [--non-local-num-loadstores-threshold [NON_LOCAL_NUM_LOADSTORES_THRESHOLD]]
64
+ [--disable-degraded-flatten-axes | --no-disable-degraded-flatten-axes] [--use-accurate-reduce-cost-model | --no-use-accurate-reduce-cost-model]
65
+ [--visualize-detailed-pag-graph | --no-visualize-detailed-pag-graph] [--visualize-simplified-pag-graph | --no-visualize-simplified-pag-graph]
66
+ [--visualize-undecided-cc-graph | --no-visualize-undecided-cc-graph] [--disable-prefer-par-on-non-broadcast | --no-disable-prefer-par-on-non-broadcast]
67
+ [--cycle-based-layout-solution-size-threshold [CYCLE_BASED_LAYOUT_SOLUTION_SIZE_THRESHOLD]]
68
+ [--split-ucc-tensor-size-threshold-in-bytes [SPLIT_UCC_TENSOR_SIZE_THRESHOLD_IN_BYTES]] [--minimum-legal-par-tripcount [MINIMUM_LEGAL_PAR_TRIPCOUNT]]
69
+ [--operator-fution-split-ratio [OPERATOR_FUTION_SPLIT_RATIO]] [--keep-tensor-names | --no-keep-tensor-names] [--show-scalar-values | --no-show-scalar-values]
70
+ [--one-tensor-per-line | --no-one-tensor-per-line] [--no-ssa-style | --no-no-ssa-style] [--no-collapse-like-dims | --no-no-collapse-like-dims]
71
+ [--keep-offloaded-mem-intrinsics | --no-keep-offloaded-mem-intrinsics] [--no-color-terminal | --no-no-color-terminal]
72
+ [--dump-sharding-decision-graph | --no-dump-sharding-decision-graph] [--shard-axes [SHARD_AXES]]
73
+ [--experimental-sharding-propagation | --no-experimental-sharding-propagation] [--mem-bound-ratio-for-mm-sharding [MEM_BOUND_RATIO_FOR_MM_SHARDING]]
74
+ [--enable-lower-shard-axis-before-fusion | --no-enable-lower-shard-axis-before-fusion] [--enable-nki-attention-kernel | --no-enable-nki-attention-kernel]
75
+ [--enable-software-pipelining | --no-enable-software-pipelining] [--internal-lnc-pad-sendrecv | --no-internal-lnc-pad-sendrecv]
76
+ [--enable-send-recv-cce | --no-enable-send-recv-cce] [--use-ilp-layout-search | --no-use-ilp-layout-search]
77
+ [--set-nki-shard-on-producer-consumer | --no-set-nki-shard-on-producer-consumer]
78
+ [--insert-offloaded-transpose-dma-free-threshold [INSERT_OFFLOADED_TRANSPOSE_DMA_FREE_THRESHOLD]] [--enable-cast-in-select | --no-enable-cast-in-select]
79
+ [--delinear-contract-dim | --no-delinear-contract-dim] [--vectorize-partitions | --no-vectorize-partitions]
80
+ [--internal-disable-double-row-gen3 | --no-internal-disable-double-row-gen3] [--internal-autotune | --no-internal-autotune]
81
+ [--internal-autotune-config [INTERNAL_AUTOTUNE_CONFIG]] [--internal-autotune-subprocess [INTERNAL_AUTOTUNE_SUBPROCESS]]
82
+ [--internal-autotune-extraction-process [INTERNAL_AUTOTUNE_EXTRACTION_PROCESS]] [--tf-dma-size-in-bytes [TF_DMA_SIZE_IN_BYTES]]
83
+ [--tf-low-memory-pressure-threshold [TF_LOW_MEMORY_PRESSURE_THRESHOLD]] [--enable-isl-in-injective-check | --no-enable-isl-in-injective-check]
84
+ [--enable-symbolic-memory-pressure-estimation-tf | --no-enable-symbolic-memory-pressure-estimation-tf]
85
+ [--allow-ccrank-axis-tritium-fusion | --no-allow-ccrank-axis-tritium-fusion]
86
+ [--internal-autotune-tritium-use-more-tripcounts | --no-internal-autotune-tritium-use-more-tripcounts]
87
+ [--internal-autotune-tritium-only-with-id [INTERNAL_AUTOTUNE_TRITIUM_ONLY_WITH_ID]] [--vectorize-strided-dma | --no-vectorize-strided-dma]
88
+ [--profile-smt | --no-profile-smt] [--number-of-devices [NUMBER_OF_DEVICES]] [--cc-pipeline-tiling-factor [CC_PIPELINE_TILING_FACTOR]]
89
+ [--no-cc-pipeline-tiling-for-fsdp | --no-no-cc-pipeline-tiling-for-fsdp] [--cc-pipeline-tiling-for-fsdp-only | --no-cc-pipeline-tiling-for-fsdp-only]
90
+ [--experimental-convolution-kernel-match | --no-experimental-convolution-kernel-match] [--disable-inline-cast | --no-disable-inline-cast]
91
+ [--disable-affine-select | --no-disable-affine-select] [--profile-memory-pressure | --no-profile-memory-pressure]
92
+ [--report-n-lowest-utilization [REPORT_N_LOWEST_UTILIZATION]] [--vectorize-direct-dma | --no-vectorize-direct-dma]
93
+ [--log-top-n-latency-dmas [LOG_TOP_N_LATENCY_DMAS]] [--low-psum-usage-threshold [LOW_PSUM_USAGE_THRESHOLD]]
94
+ [--warn-parallelism-threshold [WARN_PARALLELISM_THRESHOLD]] [--disable-square-matmul | --no-disable-square-matmul]
95
+ [--disable-vector-transpose | --no-disable-vector-transpose] [--disable-software-replication | --no-disable-software-replication]
96
+ [--internal-disable-fma-on-ios | --no-internal-disable-fma-on-ios] [--nki-dl | --no-nki-dl] [--disable-tiling-allreduce | --no-disable-tiling-allreduce]
97
+ [--annotate-no-spill-hint | --no-annotate-no-spill-hint] [--print-nki | --no-print-nki] [--nki-debug-mode | --no-nki-debug-mode]
98
+ [--ccop-bucketing | --no-ccop-bucketing] [--fp32-cast-input-tensors | --no-fp32-cast-input-tensors] [--enable-tritium-loopfusion | --no-enable-tritium-loopfusion]
99
+ [--enable-ternary-fission | --no-enable-ternary-fission] [--disable-insert-implicit-shard-axis | --no-disable-insert-implicit-shard-axis]
100
+ [--enable-hoist-wlo-all-gather | --no-enable-hoist-wlo-all-gather] [--enable-hoist-fsdp-collectives | --no-enable-hoist-fsdp-collectives]
101
+ [--disable-concat-delinearizer | --no-disable-concat-delinearizer] [--enable-aliasing-dependency-verifier | --no-enable-aliasing-dependency-verifier]
102
+ [--enable-must-alias-to-iobuffer | --no-enable-must-alias-to-iobuffer] [--disable-partition-locality-tiling | --no-disable-partition-locality-tiling]
103
+ [--enable-memory-pressure-driven-loop-fusion | --no-enable-memory-pressure-driven-loop-fusion] [--legalize-tensor-tensor-op | --no-legalize-tensor-tensor-op]
104
+ [--layout-transform-heuristic [LAYOUT_TRANSFORM_HEURISTIC]] [--disable-bir-codegen-loadstore | --no-disable-bir-codegen-loadstore]
105
+ [--dump-tensorizer-bir-json | --no-dump-tensorizer-bir-json] [--disable-rank-id-rewriting | --no-disable-rank-id-rewriting]
106
+ [--vectorization-size [VECTORIZATION_SIZE]] [--atol [ATOL]] [--rtol [RTOL]] [--save-locals | --no-save-locals]
107
+ [--no-simplify-before-simulation | --no-no-simplify-before-simulation] [--correct-precision-mode | --no-correct-precision-mode]
108
+ [--dont-verify-after-all | --no-dont-verify-after-all] [--disable-debug-info-dump | --no-disable-debug-info-dump] [--run-pass-list [RUN_PASS_LIST]]
109
+ [--dump-pass-list [DUMP_PASS_LIST]] [--dump-pass-list-and-exit | --no-dump-pass-list-and-exit] [--print-stats | --no-print-stats]
110
+ [--run-simulator-after [RUN_SIMULATOR_AFTER]] [--enable-peephole-inst-combine | --no-enable-peephole-inst-combine]
111
+ [--enable-repartitioning | --no-enable-repartitioning] [--no-ccop-barrier | --no-no-ccop-barrier]
112
+ [--enable-iobuffer-to-must-alias | --no-enable-iobuffer-to-must-alias] [--custom-script [CUSTOM_SCRIPT]] [--enable-bir-converter [ENABLE_BIR_CONVERTER]]
113
+ [--custom-compute [CUSTOM_COMPUTE]] [--enable-bircodegen-unroll [ENABLE_BIRCODEGEN_UNROLL]] [--fuse-param-to-neff | --no-fuse-param-to-neff]
114
+ [--only-compile-subgraph [ONLY_COMPILE_SUBGRAPH]] [--model-type-transformer | --no-model-type-transformer] [--model-type-cnn-training | --no-model-type-cnn-training]
115
+ [--distribution-type-llm-training | --no-distribution-type-llm-training] [--num-neuroncores-per-sengine [NUM_NEURONCORES_PER_SENGINE]]
116
+ neuronx-cc: error: argument --cc-pipeline-tiling-factor: invalid int value: '2--vectorize-strided-dma'
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73521137a0627d1cbcf3276af2044ea2e025b43384d5ba149c1ee9f28e06ae23
3
+ size 88353
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b66d4b1a0323238c3a55ab0dc7f54b938f398ac7b79ed19aed77d248df0ddc12
3
+ size 308224
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e68080a74eeee69a4cca47ffb58ac94d4475079ee317556d3c4985d658030a7
3
+ size 52641
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1faffbfaeb70a15963aaa15126906cc088054168366d15b32f56ff2235d63f96
3
+ size 185344
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75310b79ede900f855f7d33db734190e3d8b1f8c38de1d620c7660449b8abde
3
+ size 195539
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91462061b8dd8d78b36005464d30793234a3b8d0d65025605c7a747756879de4
3
+ size 88814
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fccf83e1f68a07690baf4dba83c9edb5a4a0dcb4f9b868628e780b46605ac229
3
+ size 236544
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]